diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 30c24d9..140404a 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1209,7 +1209,12 @@ private static void populateLlapDaemonVarsSet(Set llapDaemonVarsSetLocal "joins unnecessary memory will be allocated and then trimmed."), HIVEHYBRIDGRACEHASHJOINBLOOMFILTER("hive.mapjoin.hybridgrace.bloomfilter", true, "Whether to " + "use BloomFilter in Hybrid grace hash join to minimize unnecessary spilling."), - + HIVEMAPJOINFULLOUER("hive.mapjoin.full.outer", true, + "Whether to use MapJoin for FULL OUTER JOINs."), + HIVE_TEST_MAPJOINFULLOUER_OVERRIDE("hive.test.mapjoin.full.outer.override", false, + "internal use only, used to override the hive.mapjoin.full.outer setting " + + "The default is false, of course", + true), HIVESMBJOINCACHEROWS("hive.smbjoin.cache.rows", 10000, "How many rows with the same key value should be cached in memory per smb joined table."), HIVEGROUPBYMAPINTERVAL("hive.groupby.mapaggr.checkinterval", 100000, diff --git data/files/fullouter_long_big_1a.txt data/files/fullouter_long_big_1a.txt new file mode 100644 index 0000000..8cf831f --- /dev/null +++ data/files/fullouter_long_big_1a.txt @@ -0,0 +1,11 @@ +-5310365297525168078 +-6187919478609154811 +968819023021777205 +3313583664488247651 +-5206670856103795573 +\N +-6187919478609154811 +1569543799237464101 +-6187919478609154811 +-8460550397108077433 +-6187919478609154811 diff --git data/files/fullouter_long_big_1a_nonull.txt data/files/fullouter_long_big_1a_nonull.txt new file mode 100644 index 0000000..b2325ad --- /dev/null +++ data/files/fullouter_long_big_1a_nonull.txt @@ -0,0 +1,10 @@ +1569543799237464101 +-6187919478609154811 +968819023021777205 +-8460550397108077433 +-6187919478609154811 +-5310365297525168078 +-6187919478609154811 +-5206670856103795573 +3313583664488247651 +-6187919478609154811 diff --git data/files/fullouter_long_big_1b.txt data/files/fullouter_long_big_1b.txt new file mode 100644 index 0000000..87c2b3c --- /dev/null +++ data/files/fullouter_long_big_1b.txt @@ -0,0 +1,13 @@ +\N +31713 +31713 +31713 +31713 +32030 +31713 +-25394 +31713 +31713 +31713 +31713 +31713 diff --git data/files/fullouter_long_big_1c.txt data/files/fullouter_long_big_1c.txt new file mode 100644 index 0000000..2d13c26 --- /dev/null +++ data/files/fullouter_long_big_1c.txt @@ -0,0 +1,11 @@ +1928928239,\N +-1437463633,YYXPPCH +-1437463633,TKTKGVGFW +1725068083,MKSCCE +1928928239,\N +\N,ABBZ +1928928239,AMKTIWQ +-1437463633,JU +1928928239,VAQHVRI +-1437463633,SOWDWMS +-1437463633,\N diff --git data/files/fullouter_long_big_1d.txt data/files/fullouter_long_big_1d.txt new file mode 100644 index 0000000..4137f67 --- /dev/null +++ data/files/fullouter_long_big_1d.txt @@ -0,0 +1,12 @@ +-702028721 +-702028721 +-1780951928 +-670834064 +-814597051 +\N +-814597051 +-814597051 +-702028721 +-2038654700 +\N +-814597051 diff --git data/files/fullouter_long_small_1a.txt data/files/fullouter_long_small_1a.txt new file mode 100644 index 0000000..45d5825 --- /dev/null +++ data/files/fullouter_long_small_1a.txt @@ -0,0 +1,54 @@ +-1339636982994067311,2000-06-20 +-2575185053386712613,2105-01-21 +\N,2098-02-10 +-6784441713807772877,1845-02-16 +\N,2024-01-23 +-4224290881682877258,2185-07-08 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +434940853096155515,2275-02-08 +3873405809071478736,2034-06-09 +-2184423060953067642,1880-10-06 +7297177530102477725,1921-05-11 +7937120928560087303,2083-03-14 +\N,2242-02-08 +-2688622006344936758,2129-01-11 +214451696109242839,1977-01-04 +-4961171400048338491,2196-08-10 +4436884039838843341,2031-05-23 +2438535236662373438,1916-01-10 +6049335087268933751,2282-06-09 +8755921538765428593,1827-05-01 +5252407779338300447,2039-03-10 +-2184423060953067642,1853-07-06 +7297177530102477725,1926-04-12 +-2098090254092150988,1817-03-12 +-5754527700632192146,1958-07-15 +-614848861623872247,2112-11-09 +5246983111579595707,1817-07-01 +-2098090254092150988,2219-12-23 +-5706981533666803767,2151-06-09 +7297177530102477725,2125-08-26 +-7707546703881534780,2134-08-20 +214451696109242839,2179-04-18 +3845554233155411208,1805-11-10 +3905351789241845882,2045-12-05 +2438535236662373438,2026-06-23 +-2688622006344936758,1948-10-15 +6049335087268933751,2086-12-17 +-2575185053386712613,1809-07-12 +-327698348664467755,2222-10-15 +-4224290881682877258,1813-05-17 +3873405809071478736,2164-04-23 +-5706981533666803767,1800-09-20 +214451696109242839,1855-05-12 +2438535236662373438,1881-09-16 +5252407779338300447,2042-04-26 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-6784441713807772877,2054-06-17 +5246983111579595707,2260-05-11 +-1339636982994067311,2008-12-03 +3873405809071478736,1918-11-20 +-4224290881682877258,2120-01-16 +3845554233155411208,2264-04-05 diff --git data/files/fullouter_long_small_1a_nonull.txt data/files/fullouter_long_small_1a_nonull.txt new file mode 100644 index 0000000..bf94d5a --- /dev/null +++ data/files/fullouter_long_small_1a_nonull.txt @@ -0,0 +1,51 @@ +5246983111579595707,1817-07-01 +4436884039838843341,2031-05-23 +-4224290881682877258,1813-05-17 +-4961171400048338491,2196-08-10 +-2575185053386712613,2105-01-21 +5252407779338300447,2042-04-26 +-614848861623872247,2101-05-25 +-2098090254092150988,2163-05-26 +2438535236662373438,1881-09-16 +214451696109242839,2179-04-18 +2438535236662373438,2026-06-23 +-2184423060953067642,1853-07-06 +3873405809071478736,2164-04-23 +214451696109242839,1855-05-12 +-6784441713807772877,1845-02-16 +-2688622006344936758,1948-10-15 +7297177530102477725,1921-05-11 +-2575185053386712613,1809-07-12 +3905351789241845882,2045-12-05 +3845554233155411208,1805-11-10 +-3655445881497026796,2108-08-16 +3905351789241845882,1866-07-28 +-1339636982994067311,2008-12-03 +7297177530102477725,2125-08-26 +7297177530102477725,1926-04-12 +-5706981533666803767,1800-09-20 +6049335087268933751,2282-06-09 +3845554233155411208,2264-04-05 +8755921538765428593,1827-05-01 +-1339636982994067311,2000-06-20 +-2098090254092150988,1817-03-12 +3873405809071478736,2034-06-09 +2438535236662373438,1916-01-10 +5246983111579595707,2260-05-11 +-5706981533666803767,2151-06-09 +-614848861623872247,2112-11-09 +-327698348664467755,2222-10-15 +-2184423060953067642,1880-10-06 +434940853096155515,2275-02-08 +-4224290881682877258,2120-01-16 +-5754527700632192146,1958-07-15 +-4224290881682877258,2185-07-08 +-2098090254092150988,2219-12-23 +-7707546703881534780,2134-08-20 +214451696109242839,1977-01-04 +-2688622006344936758,2129-01-11 +7937120928560087303,2083-03-14 +-6784441713807772877,2054-06-17 +3873405809071478736,1918-11-20 +6049335087268933751,2086-12-17 +5252407779338300447,2039-03-10 diff --git data/files/fullouter_long_small_1b.txt data/files/fullouter_long_small_1b.txt new file mode 100644 index 0000000..7d45fe4 --- /dev/null +++ data/files/fullouter_long_small_1b.txt @@ -0,0 +1,72 @@ +2748,2298-06-20 21:01:24 +11232,2533-11-26 12:22:18 +\N,2124-05-07 15:01:19.021 +3198,2428-06-13 16:21:33.955 +-7624,2219-12-03 17:07:19 +24870,2752-12-26 12:32:23.03685163 +14865,2943-03-21 00:42:10.505 +-8624,2644-05-04 04:45:07.839 +-30059,2269-05-04 21:23:44.000339209 +14865,2079-10-06 16:54:35.117 +-8435,2834-12-06 16:38:18.901 +10553,2168-05-05 21:10:59.000152113 +-8624,2282-03-28 07:58:16 +-15361,2219-09-15 20:15:03.000169887 +-14172,1918-09-13 11:44:24.496926711 +26484,1919-03-04 07:32:37.519 +-14172,2355-01-14 23:23:34 +-24775,2920-08-06 15:58:28.261059449 +-23117,2037-01-05 21:52:30.685952759 +17125,2236-07-14 01:54:40.927230276 +21181,2253-03-12 11:55:48.332 +-7373,2662-10-28 12:07:02.000526564 +-8087,2550-06-26 23:57:42.588007617 +29407,2385-12-14 06:03:39.597 +21181,2434-02-20 00:46:29.633 +-14172,2809-06-07 02:10:58 +13598,2421-05-20 14:18:31.000264698 +2748,2759-02-13 18:04:36.000307355 +-22422,1949-03-13 00:07:53.075 +26484,2953-03-10 02:05:26.508953676 +4510,2777-03-24 03:44:28.000169723 +-24775,2035-03-26 08:11:23.375224153 +-30059,2713-10-13 09:28:49 +-20517,2774-06-23 12:04:06.5 +11232,2038-04-06 14:53:59 +32030,2101-09-09 07:35:05.145 +-29600,2333-11-02 15:06:30 +-30306,2619-05-24 10:35:58.000774018 +-7624,2289-08-28 00:14:34 +-4279,2470-08-12 11:21:14.000955747 +-4279,2214-09-10 03:53:06 +-26998,2428-12-26 07:53:45.96925825 +17125,2629-11-15 15:34:52 +-8087,2923-07-02 11:40:26.115 +2632,2561-12-15 15:42:27 +21436,2696-05-08 05:19:24.112 +\N,2971-08-07 12:02:11.000948152 +-7624,2623-03-20 03:18:45.00006465 +-26998,2926-07-18 09:02:46.077 +11232,2507-01-27 22:04:22.49661421 +-30059,2420-12-10 22:12:30 +-15427,2355-01-08 12:34:11.617 +3198,2223-04-14 13:20:49 +-19167,2319-08-26 11:07:11.268 +14865,2220-02-28 03:41:36 +-20517,2233-12-20 04:06:56.666522799 +-15427,2046-06-07 22:58:40.728 +2748,2862-04-20 13:12:39.482805897 +-8435,2642-02-07 11:45:04.353231638 +-19167,2230-12-22 20:25:39.000242111 +-15427,2023-11-09 19:31:21 +13598,2909-06-25 23:22:50 +21436,2526-09-22 23:44:55 +-15361,2434-08-13 20:37:07.000172979 +4510,2293-01-17 13:47:41.00001006 +-8624,2120-02-15 15:36:40.000758423 +-22422,2337-07-19 06:33:02.000353352 +-26998,2268-08-04 12:48:11.848006292 +-22422,2982-12-28 06:30:26.000883228 +\N,2933-06-20 11:48:09.000839488 +3198,2736-12-20 03:59:50.343550301 +-20824,2478-11-05 00:28:05 diff --git data/files/fullouter_long_small_1c.txt data/files/fullouter_long_small_1c.txt new file mode 100644 index 0000000..ff323d3 --- /dev/null +++ data/files/fullouter_long_small_1c.txt @@ -0,0 +1,81 @@ +-1093006502,-69.55665828 +452719211,83003.43722 +1242586043,71.1485 +-934092157,-7843850349.57130038 +294598722,-3542.6 +284554389,5.727146 +90660785,12590.288613 +-99948814,-38076694.3981 +466567142,-9763217822.129028 +1909136587,-8610.078036935181 +1242586043,-4 +\N,1.089120893565337 +1039864870,987601.57 +-466171792,0 +-1681455031,-6.4543 +1755897735,-39.965207 +1585021913,745222.66808954 +448130683,-4302.485366846491 +193709887,0.8 +-424713789,0.48 +1585021913,607.22747 +-1250662632,5454127198.951479 +294598722,-9377326244.444 +193709887,-19889.83 +1039864870,0.7 +1242586043,-749975924224.63 +-1250662632,-544.554649 +-1740848088,-9.157 +-369457052,7.7 +-369457052,560.11907883090455 +90660785,-4564.517185 +466567142,-58810.60586 +466567142,196.5785295398584 +1738753776,1525.280459649262 +1816559437,-1035.7009 +-1490239076,92253.232096 +1039864870,94.04 +560745412,678.25 +-466171792,4227.5344 +1561921421,53050.55 +-99948814,-96386.438 +1519948464,152 +1719049112,-7888197 +-793950320,-16 +-466171792,69.9 +1738753776,-99817635066320.2416 +1091836730,0.02 +891262439,-0.04 +452719211,3020.2938930744636 +-2048404259,3939387044.1 +698032489,-330457.4292625839 +-1197550983,-0.5588796922 +-2123273881,-55.89198 +-2048404259,-0.3222960446251 +1585021913,-5762331.06697112 +1785750809,47443.115 +1909136587,181.07681535944 +1801735854,-1760956929364.267 +\N,4.26165227 +1801735854,-438541294.7 +150678276,-8278 +1479580778,92077343080.7 +1091836730,-5017.14 +193709887,-0.5663 +-1681455031,-11105.372477 +-1250662632,93104 +-1197550983,0.1 +\N,682070836.2649603 +-1197550983,71852.8338674412613 +1561921421,-5.405 +-1740848088,0.506394259 +150678276,15989394.8436 +-793950320,-0.1 +-1740848088,901.441 +-477147437,6 +-1264372462,0.883 +-2123273881,3.959 +-1264372462,-6993985240226 +-1264372462,-899 +-243940373,-97176129669.654953 +-243940373,-583.258 diff --git data/files/fullouter_long_small_1d.txt data/files/fullouter_long_small_1d.txt new file mode 100644 index 0000000..9778d3f --- /dev/null +++ data/files/fullouter_long_small_1d.txt @@ -0,0 +1,39 @@ +533298451 +1164387380 +1614287784 +1635405412 +-1912571616 +-894799664 +-1210744742 +-1014271154 +-747044796 +-1003639073 +436878811 +-1323620496 +-1379355738 +-1712018127 +246169862 +1431997749 +670834064 +1780951928 +-707688773 +1997943409 +1372592319 +-932176731 +162858059 +-683339273 +-497171161 +699863556 +1685473722 +41376947 +-1036083124 +1825107160 +-2038654700 +2119085509 +260588085 +-1792852276 +1831520491 +103640700 +\N +699007128 +1840266070 diff --git data/files/fullouter_multikey_big_1a.txt data/files/fullouter_multikey_big_1a.txt new file mode 100644 index 0000000..fe38c7b --- /dev/null +++ data/files/fullouter_multikey_big_1a.txt @@ -0,0 +1,13 @@ +22767,-1969080993 +-17582,-1730236061 +3556,\N +-17582,1082230084 +-17582,827141667 +1499,371855128 +-17582,9637312 +\N,1082230084 +-6131,-1969080993 +3556,-1969080993 +\N,\N +-18222,-1969080993 +-17582,267529350 diff --git data/files/fullouter_multikey_big_1a_nonull.txt data/files/fullouter_multikey_big_1a_nonull.txt new file mode 100644 index 0000000..40e84b0 --- /dev/null +++ data/files/fullouter_multikey_big_1a_nonull.txt @@ -0,0 +1,10 @@ +-17582,1082230084 +22767,-1969080993 +-17582,827141667 +-17582,-1730236061 +3556,-1969080993 +-6131,-1969080993 +-18222,-1969080993 +1499,371855128 +-17582,267529350 +-17582,9637312 diff --git data/files/fullouter_multikey_big_1b.txt data/files/fullouter_multikey_big_1b.txt new file mode 100644 index 0000000..40cfb9a --- /dev/null +++ data/files/fullouter_multikey_big_1b.txt @@ -0,0 +1,17 @@ +2061-12-19 22:10:32.000628309,21635,ANCO +\N,21635,ANCO +2686-05-23 07:46:46.565832918,13212,NCYBDW +2082-07-14 04:00:40.695380469,12556,NCYBDW +2188-06-04 15:03:14.963259704,9468,AAA +2608-02-23 23:44:02.546440891,26184,NCYBDW +2093-04-10 23:36:54.846,\N,\N +2898-10-01 22:27:02.000871113,10361,NCYBDW +2306-06-21 11:02:00.143124239,1446,\N +\N,-6909,\N +\N,\N,\N +2306-06-21 11:02:00.143124239,-6909,NCYBDW +2093-04-10 23:36:54.846,1446,GHZVPWFO +\N,\N,CCWYD +2686-05-23 07:46:46.565832918,\N,GHZVPWFO +2093-04-10 23:36:54.846,28996,Q +2299-11-15 16:41:30.401,-31077,NCYBDW diff --git data/files/fullouter_multikey_small_1a.txt data/files/fullouter_multikey_small_1a.txt new file mode 100644 index 0000000..4e0742c --- /dev/null +++ data/files/fullouter_multikey_small_1a.txt @@ -0,0 +1,92 @@ +23015,258882280 +23015,-276888585 +21186,-586336015 +-22311,-2055239583 +3412,-1249487623 +\N,1082230084 +20156,-1618478138 +-17788,-738743861 +-24206,-1456409156 +30353,2044473567 +20969,-1995259010 +-23457,-63842445 +3412,-2081156563 +-6131,-1969080993 +23015,-252525791 +30353,1364268303 +23015,564751472 +15404,1078466156 +4586,-586336015 +-4117,-1386947816 +-26894,-63842445 +-17788,-1361776766 +-7386,-2112062470 +23015,-1893013623 +30353,1241923267 +-24206,641361618 +-28129,-2055239583 +-20125,-1995259010 +16166,931172175 +31443,-1968665833 +-28313,837320573 +11460,1078466156 +15061,-63842445 +13672,-63842445 +14400,-825652334 +-7386,100736776 +26944,-1995259010 +-11868,97203778 +12089,-63842445 +-28137,-63842445 +3412,1253976194 +-980,2009785365 +16696,-63842445 +-11868,930596435 +4902,1078466156 +-17582,267529350 +-12252,964377504 +20156,963883665 +-11868,1658440922 +4779,-1995259010 +-7386,-1635102480 +-28313,51228026 +-11868,1052120431 +-980,-270600267 +-20900,1078466156 +\N,\N +20156,1165375499 +30353,-1507157031 +3412,-1196037018 +22934,-1695419330 +30353,105613996 +-17788,-872691214 +-980,-333603940 +30353,-1011627089 +-11868,-3536499 +-2407,1078466156 +23015,-217613200 +-28313,-706104224 +-980,712692345 +-11868,1456809245 +-17788,528419995 +-11868,-915441041 +-980,628784462 +30353,-1007182618 +23015,-696928205 +-980,356970043 +23015,-893234501 +-980,-465544127 +-5734,1078466156 +-980,-801821285 +26738,-2055239583 +8177,-1995259010 +-11868,1318114822 +3890,1411429004 +-6061,-586336015 +3412,-2132472060 +-15212,-2055239583 +-12252,1956403781 +5957,-1995259010 +-1787,-63842445 +20156,1855042153 +-980,1310479628 diff --git data/files/fullouter_multikey_small_1a_nonull.txt data/files/fullouter_multikey_small_1a_nonull.txt new file mode 100644 index 0000000..2a8b9a1 --- /dev/null +++ data/files/fullouter_multikey_small_1a_nonull.txt @@ -0,0 +1,90 @@ +16696,-63842445 +4586,-586336015 +26738,-2055239583 +-17788,-738743861 +-28313,-706104224 +-23457,-63842445 +-20900,1078466156 +-12252,964377504 +-28313,51228026 +-11868,-3536499 +11460,1078466156 +26944,-1995259010 +20156,1855042153 +-11868,97203778 +15061,-63842445 +-17788,528419995 +-26894,-63842445 +-28313,837320573 +20156,963883665 +-15212,-2055239583 +5957,-1995259010 +30353,-1011627089 +3890,1411429004 +-980,-333603940 +13672,-63842445 +-980,628784462 +23015,-252525791 +-11868,1052120431 +-980,356970043 +23015,-217613200 +-6061,-586336015 +-5734,1078466156 +-11868,1318114822 +23015,258882280 +-2407,1078466156 +12089,-63842445 +3412,-2132472060 +-28129,-2055239583 +-980,-270600267 +16166,931172175 +-7386,100736776 +4902,1078466156 +20969,-1995259010 +22934,-1695419330 +3412,-1249487623 +3412,1253976194 +21186,-586336015 +8177,-1995259010 +-7386,-1635102480 +-11868,1456809245 +-20125,-1995259010 +-980,-801821285 +-980,1310479628 +23015,564751472 +23015,-893234501 +4779,-1995259010 +-980,2009785365 +-24206,641361618 +30353,-1507157031 +14400,-825652334 +3412,-2081156563 +20156,-1618478138 +31443,-1968665833 +-22311,-2055239583 +30353,1241923267 +-11868,930596435 +-17788,-1361776766 +-24206,-1456409156 +-7386,-2112062470 +30353,1364268303 +23015,-1893013623 +-17788,-872691214 +30353,2044473567 +-28137,-63842445 +30353,105613996 +-6131,-1969080993 +-17582,267529350 +23015,-276888585 +-12252,1956403781 +23015,-696928205 +-11868,1658440922 +-1787,-63842445 +-11868,-915441041 +-980,-465544127 +30353,-1007182618 +-980,712692345 +20156,1165375499 +3412,-1196037018 +15404,1078466156 +-4117,-1386947816 diff --git data/files/fullouter_multikey_small_1b.txt data/files/fullouter_multikey_small_1b.txt new file mode 100644 index 0000000..b56a3f7 --- /dev/null +++ data/files/fullouter_multikey_small_1b.txt @@ -0,0 +1,118 @@ +2304-12-15 15:31:16,11101,YJCKKCR,-0.2 +2018-11-25 22:27:55.84,-12202,VBDBM,7506645.9537 +1957-03-06 09:57:31,-26373,NXLNNSO,2 +2332-06-14 07:02:42.32,-26373,XFFFDTQ,56845106806308.9 +2535-03-01 05:04:49.000525883,23663,ALIQKNXHE,-0.1665691 +2629-04-07 01:54:11,-6776,WGGFVFTW,6.8012851708 +2266-09-26 06:27:29.000284762,20223,EDYJJN,14 +2969-01-23 14:08:04.000667259,-18138,VDPN,8924831210.42768019 +2861-05-27 07:13:01.000848622,-19598,WKPXNLXS,29399 +2301-06-03 17:16:19,15332,ZVEUKC,0.5 +1980-09-13 19:57:15,\N,M,57650.7723 +2304-12-15 15:31:16,1301,T,-0.8 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-991.43605 +2044-05-02 07:00:03.35,-8751,ZSMB,-453797242.029791752 +2409-09-23 10:33:27,2638,XSXR,-9926693851 +1941-10-16 02:19:36.000423663,-24459,AO,-821445414.4579712 +2512-10-06 03:03:03,-3465,VZQ,-49.51219 +2971-02-14 09:13:19,-16605,BVACIRP,-5.751278023 +2075-10-25 20:32:40.000792874,\N,\N,226612651968.36076 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,5 +2969-01-23 14:08:04.000667259,14500,WXLTRFQP,-23.8198 +2898-12-18 03:37:17,-24459,MHNBXPBM,14.23669356238481 +\N,\N,\N,-2207.3 +2391-01-17 15:28:37.00045143,16160,ZVEUKC,771355639420297.133 +2309-01-15 12:43:49,22821,ZMY,40.9 +2340-12-15 05:15:17.133588982,23663,HHTP,33383.8 +2969-01-23 14:08:04.000667259,-8913,UIMQ,9.178 +2145-10-15 06:58:42.831,2638,\N,-9784.82 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,-56082455.033918 +2467-05-11 06:04:13.426693647,23196,EIBSDASR,-8.5548883801 +2829-06-04 08:01:47.836,22771,ZVEUKC,94317.75318 +2938-12-21 23:35:59.498,29362,ZMY,0.88 +2304-12-15 15:31:16,-13125,JFYW,6.086657 +2808-07-09 02:10:11.928498854,-19598,FHFX,0.3 +2083-06-07 09:35:19.383,-26373,MR,-394.0867 +2686-05-23 07:46:46.565832918,13212,NCYBDW,-917116793.4 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-375994644577.315257 +2338-02-12 09:30:07,20223,CTH,-6154.763054 +2629-04-07 01:54:11,-6776,WGGFVFTW,41.77451507786646 +2242-08-04 07:51:46.905,20223,UCYXACQ,37.7288 +2637-03-12 22:25:46.385,-12923,PPTJPFR,5.4 +2304-12-15 15:31:16,8650,RLNO,0.71351747335 +2688-02-06 20:58:42.000947837,20223,PAIY,67661.735 +\N,\N,\N,-2.4 +2512-10-06 03:03:03,-3465,VZQ,0.4458 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,-96.3 +2461-03-09 09:54:45.000982385,-16454,ZSMB,-9575827.55396 +2512-10-06 03:03:03,1560,X,-922.6951584107 +2396-04-06 15:39:02.404013577,29661,ZSMB,0.76718326 +2409-09-23 10:33:27,2638,XSXR,0.4 +2969-01-23 14:08:04.000667259,6689,TFGVOGPJF,-0.01 +2333-07-28 09:59:26,23196,RKSK,37872288434740893.5 +2409-09-23 10:33:27,2638,XSXR,-162.95 +2357-05-08 07:09:09.000482799,6226,ZSMB,-472 +2304-12-15 15:31:16,15090,G,-4319470286240016.3 +2304-12-15 15:31:16,1301,T,61.302 +2105-01-04 16:27:45,23100,ZSMB,-83.2328 +2242-08-04 07:51:46.905,20223,UCYXACQ,-0.26149 +2637-03-12 22:25:46.385,-17786,HYEGQ,-84.169614329419 +1931-12-04 11:13:47.269597392,23196,HVJCQMTQL,-9697532.8994 +2897-08-10 15:21:47.09,23663,XYUVBED,6370 +2888-05-08 08:36:55.182302102,5786,ZVEUKC,57.62175257788037 +2145-10-15 06:58:42.831,2638,UANGISEXR,-5996.306 +2462-12-16 23:11:32.633305644,-26373,CB,67.41799 +2396-04-06 15:39:02.404013577,29661,ZSMB,-5151598.347 +2304-12-15 15:31:16,15090,G,975 +2512-10-06 03:03:03,32099,ARNZ,-0.41 +2188-06-04 15:03:14.963259704,9468,AAA,2.75496352 +2512-10-06 03:03:03,1560,X,761196.522 +2304-12-15 15:31:16,1301,T,2720.8 +1919-06-20 00:16:50.611028595,20223,ZKBC,-23 +2897-08-10 15:21:47.09,23663,XYUVBED,51.7323303273 +2086-04-09 00:03:10,20223,THXNJGFFV,-85184687349898.892 +2238-05-17 19:27:25.519,20223,KQCM,-0.01095 +2086-04-09 00:03:10,20223,THXNJGFFV,482.5383411359219 +2480-10-02 09:31:37.000770961,-26373,NBN,-5875.5197252 +2086-04-09 00:03:10,20223,THXNJGFFV,0.4396861 +2759-11-26 22:19:55.410967136,-27454,ZMY,60.6025797 +2083-06-07 09:35:19.383,-26373,MR,67892053.02376094 +2882-05-20 07:21:25.221299462,23196,U,-9951044 +2971-02-14 09:13:19,-16605,BVACIRP,-27394351.3 +2512-10-06 03:03:03,24313,QBHUG,-8423.151573236 +2882-05-20 07:21:25.221299462,23196,U,-4244.926206619 +1905-04-20 13:42:25.000469776,2638,KAUUFF,7 +2410-05-03 13:44:56,2638,PHOR,-769088.176482 +2668-06-25 07:12:37.000970744,2638,TJE,-2.7796827 +2969-01-23 14:08:04.000667259,-32485,AGEPWWLJF,-48431309405.652522 +2410-05-03 13:44:56,2638,PHOR,93262.914526611 +2512-10-06 03:03:03,13195,CRJ,14 +2018-11-25 22:27:55.84,-12202,VBDBM,98790.713907420831 +2304-12-15 15:31:16,8650,RLNO,-0.4355 +2071-07-21 20:02:32.000250697,2638,NRUV,-66198.351092 +2525-05-12 15:59:35,-24459,SAVRGA,53106747151.8633 +2637-03-12 22:25:46.385,21841,CXTI,749563668434009.65 +2018-11-25 22:27:55.84,-22419,LOTLS,342.3726040228584 +2637-03-12 22:25:46.385,21841,CXTI,7362887891522.3782 +2038-10-12 09:15:33.000539653,-19598,YKNIAJW,-642807895924.66 +2957-05-07 10:41:46,20223,OWQT,-586953.153681 +2304-12-15 15:31:16,11101,YJCKKCR,1279917802.42 +2355-09-23 19:52:34.638084141,-19598,H,92.15 +2960-04-12 07:03:42.000366651,20340,CYZYUNSF,2.1577659 +2355-09-23 19:52:34.638084141,-19598,H,74179461.880493 +2969-01-23 14:08:04.000667259,-8913,UIMQ,-81 +\N,-12914,ZVEUKC,221 +2743-12-27 05:16:19.000573579,-12914,ZVEUKC,-811984611.5178497 +1957-02-01 14:00:29.000548421,-16085,ZVEUKC,-2312.8149 +2201-07-05 17:22:06.084206844,-24459,UBGT,1.5069483282 +2461-03-09 09:54:45.000982385,-16454,ZSMB,8694.89 +2169-04-02 06:30:32,23855,PDVQATOS,-1515597428 +2304-12-15 15:31:16,30285,GSJPSIYOU,0.2 +2913-07-17 15:06:58.041,-10206,\N,-0.2 +2169-04-02 06:30:32,23855,PDVQATOS,-4016.9608 +2759-11-26 22:19:55.410967136,-27454,ZMY,368 +2073-03-21 15:32:57.617920888,26425,MPRACIRYW,726945733.4193 +2304-12-15 15:31:16,11101,YJCKKCR,-0.5 +2462-12-16 23:11:32.633305644,-26373,CB,-582687 +2357-05-08 07:09:09.000482799,6226,ZSMB,-32.46 +2304-12-15 15:31:16,12587,OPW,-4.59489504 diff --git data/files/fullouter_string_big_1a.txt data/files/fullouter_string_big_1a.txt new file mode 100644 index 0000000..1cbcd05 --- /dev/null +++ data/files/fullouter_string_big_1a.txt @@ -0,0 +1,13 @@ +FTWURVH +QNCYBDW +UA +WXHJ +\N +WXHJ +PXLD +WXHJ +PXLD +WXHJ +WXHJ +MXGDMBD +PXLD diff --git data/files/fullouter_string_big_1a_nonull.txt data/files/fullouter_string_big_1a_nonull.txt new file mode 100644 index 0000000..a6566f2 --- /dev/null +++ data/files/fullouter_string_big_1a_nonull.txt @@ -0,0 +1,12 @@ +WXHJ +WXHJ +FTWURVH +MXGDMBD +UA +WXHJ +QNCYBDW +PXLD +PXLD +WXHJ +PXLD +WXHJ diff --git data/files/fullouter_string_big_1a_old.txt data/files/fullouter_string_big_1a_old.txt new file mode 100644 index 0000000..1fa51ad --- /dev/null +++ data/files/fullouter_string_big_1a_old.txt @@ -0,0 +1,13 @@ +WXHJ +WXHJ +WXHJ +WXHJ +WXHJ +QNCYBDW +PXLD +PXLD +PXLD +UA +\N +FTWURVH +MXGDMBD diff --git data/files/fullouter_string_small_1a.txt data/files/fullouter_string_small_1a.txt new file mode 100644 index 0000000..f223da0 --- /dev/null +++ data/files/fullouter_string_small_1a.txt @@ -0,0 +1,38 @@ +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +,1985-01-22,2111-01-10 15:44:28 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +\N,1865-11-08,2893-04-07 07:36:12 +BEP,2206-08-10,2331-10-09 10:59:51 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +IWEZJHKE,\N,\N +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +KL,1980-09-22,2073-08-25 11:51:10.318 +\N,1915-02-22,2554-10-27 09:34:30 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +GOYJHW,1959-04-27,\N +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +LOTLS,2126-09-16,1977-12-15 15:28:56 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +,2021-02-21,2802-04-21 18:48:18.5933838 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +SDA,2196-04-12,2462-10-26 19:28:12.733 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +FYW,1807-03-20,2305-08-17 01:32:44 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 diff --git data/files/fullouter_string_small_1a_nonull.txt data/files/fullouter_string_small_1a_nonull.txt new file mode 100644 index 0000000..6b97ef4 --- /dev/null +++ data/files/fullouter_string_small_1a_nonull.txt @@ -0,0 +1,35 @@ +LOTLS,2126-09-16,1977-12-15 15:28:56 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BEP,2141-02-19,2521-06-09 01:20:07.121 +ZNOUDCR,\N,1988-04-23 08:40:21 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +FYW,1807-03-20,2305-08-17 01:32:44 +,2021-02-21,2802-04-21 18:48:18.5933838 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +BEP,2206-08-10,2331-10-09 10:59:51 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +,1985-01-22,2111-01-10 15:44:28 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +GOYJHW,1959-04-27,\N +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +KL,1980-09-22,2073-08-25 11:51:10.318 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +IWEZJHKE,\N,\N +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 diff --git data/files/fullouter_string_small_1a_old.txt data/files/fullouter_string_small_1a_old.txt new file mode 100644 index 0000000..505c403 --- /dev/null +++ data/files/fullouter_string_small_1a_old.txt @@ -0,0 +1,38 @@ +,2021-02-21,2802-04-21 18:48:18.5933838 +,1985-01-22,2111-01-10 15:44:28 +VNRXWQ,1883-02-06,2287-07-17 16:46:58.287 +VNRXWQ,2276-11-16,2072-08-16 17:45:47.48349887 +KL,1980-09-22,2073-08-25 11:51:10.318 +FYW,1807-03-20,2305-08-17 01:32:44 +WNGFTTY,2251-08-16,2649-12-21 18:30:42.498 +WNGFTTY,1843-06-10,2411-01-28 20:03:59 +FTWURVH,1976-03-10,2683-11-22 13:07:04.66673556 +CQMTQLI,2031-09-13,1927-02-13 08:39:25.000919094 +CQMTQLI,2090-11-13,2693-03-17 16:19:55.82 +BEP,2141-02-19,2521-06-09 01:20:07.121 +BEP,2206-08-10,2331-10-09 10:59:51 +FROPIK,2023-02-28,2467-05-11 06:04:13.426693647 +FROPIK,2214-02-09,1949-08-18 17:14:38.000703738 +FROPIK,2124-10-01,2974-07-06 12:05:08.000146048 +SDA,2196-04-12,2462-10-26 19:28:12.733 +ATZJTPECF,2217-10-22,2808-10-20 16:01:24.558 +ATZJTPECF,1829-10-16,2357-05-08 07:09:09.000482799 +MXGDMBD,1880-11-01,2765-10-06 13:28:17.000688592 +IWEZJHKE,\N,\N +NADANUQMW,2037-10-19,2320-04-26 18:50:25.000426922 +GOYJHW,1993-04-07,1950-05-04 09:28:22.000114784 +GOYJHW,1976-03-06,2805-07-10 10:51:57.00083302 +GOYJHW,1959-04-27,\N +QTSRKSKB,2144-01-13,2627-12-20 03:38:53.000389266 +IOQIDQBHU,2198-02-08,2073-03-21 15:32:57.617920888 +ZNOUDCR,\N,1988-04-23 08:40:21 +AARNZRVZQ,2000-11-13,2309-06-05 19:54:13 +AARNZRVZQ,2002-10-23,2525-05-12 15:59:35 +BDBMW,2278-04-27,2101-02-21 08:53:34.692 +\N,1865-11-08,2893-04-07 07:36:12 +\N,2250-04-22,2548-03-21 08:23:13.133573801 +\N,1915-02-22,2554-10-27 09:34:30 +LOTLS,1957-11-09,2092-06-07 06:42:30.000538454 +LOTLS,2126-09-16,1977-12-15 15:28:56 +LOTLS,2099-08-04,2181-01-25 01:04:25.000030055 +GSJPSIYOU,1948-07-17,2006-09-24 16:01:24.000239251 diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java index af446db..7c86bcf 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/AbstractMapJoin.java @@ -19,6 +19,7 @@ import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.tez.ObjectCache; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; @@ -26,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestData; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerateUtil; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; @@ -74,23 +77,24 @@ public void bench() throws Exception { } protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, - VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { + VectorMapJoinVariation vectorMapJoinVariation, MapJoinTestImplementation mapJoinImplementation, + String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, + int[] bigTableRetainColumnNums, + int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters) throws Exception { this.vectorMapJoinVariation = vectorMapJoinVariation; this.mapJoinImplementation = mapJoinImplementation; testDesc = new MapJoinTestDescription( hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, + bigTableTypeInfos, bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + MapJoinPlanVariation.SHARED_SMALL_TABLE); // Prepare data. Good for ANY implementation variation. testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); @@ -109,7 +113,7 @@ protected void setupMapJoin(HiveConf hiveConf, long seed, int rowCount, if (!isVectorOutput) { bigTableRows = VectorBatchGenerateUtil.generateRowObjectArray( - testDesc.bigTableKeyTypeInfos, testData.getBigTableBatchStream(), + testDesc.bigTableTypeInfos, testData.getBigTableBatchStream(), testData.getBigTableBatch(), testDesc.outputObjectInspectors); } else { @@ -141,9 +145,20 @@ protected static MapJoinOperator setupBenchmarkImplementation( (!isVectorOutput ? new CountCollectorTestOperator() : new CountVectorCollectorTestOperator()); - MapJoinOperator operator = + // UNDONE: We need to plumb down shareMapJoinTableContainer.... + CreateMapJoinResult createMapJoinResult = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc, + /* shareMapJoinTableContainer */ null); + MapJoinOperator operator = createMapJoinResult.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = createMapJoinResult.mapJoinTableContainer; + + // Invoke initializeOp methods. + operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return operator; } diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java index c9da92a..aa88297 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinMultiKeyBenchBase.java @@ -59,7 +59,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java index a6b4719..60b2890 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneLongKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java index 1b31038..937ede1 100644 --- itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java +++ itests/hive-jmh/src/main/java/org/apache/hive/benchmark/vectorization/mapjoin/MapJoinOneStringKeyBenchBase.java @@ -57,7 +57,8 @@ public void doSetup(VectorMapJoinVariation vectorMapJoinVariation, setupMapJoin(hiveConf, seed, rowCount, vectorMapJoinVariation, mapJoinImplementation, - bigTableColumnNames, bigTableTypeInfos, bigTableKeyColumnNums, + bigTableColumnNames, bigTableTypeInfos, + bigTableKeyColumnNums, smallTableValueColumnNames, smallTableValueTypeInfos, bigTableRetainColumnNums, smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 99d3817..a93962e 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -558,6 +558,7 @@ minillaplocal.query.files=\ explainanalyze_2.q,\ explainuser_1.q,\ explainuser_4.q,\ + fullouter_mapjoin_1_optimized.q,\ groupby2.q,\ groupby_groupingset_bug.q,\ hybridgrace_hashjoin_1.q,\ @@ -729,6 +730,10 @@ minillaplocal.query.files=\ vector_complex_join.q,\ vector_decimal_2.q,\ vector_decimal_udf.q,\ + vector_full_outer_join.q,\ + vector_fullouter_mapjoin_1_fast.q,\ + vector_fullouter_mapjoin_1_optimized.q,\ + vector_fullouter_mapjoin_1_optimized_passthru.q,\ vector_groupby_cube1.q,\ vector_groupby_grouping_id1.q,\ vector_groupby_grouping_id2.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java index 02a67cb..3762ee5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java @@ -790,7 +790,16 @@ private boolean hasRightPairForLeft(int left, int right) { } private boolean hasAnyFiltered(int alias, List row) { - return row == dummyObj[alias] || hasFilter(alias) && JoinUtil.hasAnyFiltered(getFilterTag(row)); + if (row == dummyObj[alias]) { + return true; + } + if (hasFilter(alias) && row != null) { + ShortWritable shortWritable = (ShortWritable) row.get(row.size() - 1); + if (shortWritable != null) { + return JoinUtil.hasAnyFiltered(shortWritable.get()); + } + } + return false; } protected final boolean hasFilter(int alias) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java index 0a6e17a..931e78e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java @@ -668,6 +668,73 @@ private JSONObject outputPlan(Object work, return outputPlan(work, out, extended, jsonOutput, indent, ""); } + private boolean isInvokeVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // The EXPLAIN VECTORIZATION option was specified. + final boolean desireOnly = this.work.isVectorizationOnly(); + final VectorizationDetailLevel desiredVecDetailLevel = + this.work.isVectorizationDetailLevel(); + + switch (vectorization) { + case NON_VECTORIZED: + // Display all non-vectorized leaf objects unless ONLY. + if (desireOnly) { + invokeFlag = false; + } + break; + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + if (vectorization.rank < desiredVecDetailLevel.rank) { + // This detail not desired. + invokeFlag = false; + } + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + if (desireOnly) { + if (vectorization.rank < desiredVecDetailLevel.rank) { + // Suppress headers and all objects below. + invokeFlag = false; + } + } + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + + private boolean isInvokeNonVectorization(Vectorization vectorization) { + + boolean invokeFlag = true; // Assume. + + // Do not display vectorization objects. + switch (vectorization) { + case SUMMARY: + case OPERATOR: + case EXPRESSION: + case DETAIL: + invokeFlag = false; + break; + case NON_VECTORIZED: + // No action. + break; + case SUMMARY_PATH: + case OPERATOR_PATH: + // Always include headers since they contain non-vectorized objects, too. + break; + default: + throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + } + + return invokeFlag; + } + @VisibleForTesting JSONObject outputPlan(Object work, PrintStream out, boolean extended, boolean jsonOutput, int indent, String appendToHeader) throws Exception { @@ -689,65 +756,17 @@ JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeVectorization(vectorization); } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } + invokeFlag = isInvokeNonVectorization(vectorization); } } if (invokeFlag) { @@ -825,64 +844,18 @@ JSONObject outputPlan(Object work, PrintStream out, if (extended) { invokeFlag = Level.EXTENDED.in(xpl_note.explainLevels()); } else { - invokeFlag = Level.DEFAULT.in(xpl_note.explainLevels()); + invokeFlag = + Level.DEFAULT.in(xpl_note.explainLevels()) || + (this.work.isDebug() && Level.DEBUG.in(xpl_note.explainLevels())); } } if (invokeFlag) { Vectorization vectorization = xpl_note.vectorization(); - if (this.work != null && this.work.isVectorization()) { - - // The EXPLAIN VECTORIZATION option was specified. - final boolean desireOnly = this.work.isVectorizationOnly(); - final VectorizationDetailLevel desiredVecDetailLevel = - this.work.isVectorizationDetailLevel(); - - switch (vectorization) { - case NON_VECTORIZED: - // Display all non-vectorized leaf objects unless ONLY. - if (desireOnly) { - invokeFlag = false; - } - break; - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - if (vectorization.rank < desiredVecDetailLevel.rank) { - // This detail not desired. - invokeFlag = false; - } - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - if (desireOnly) { - if (vectorization.rank < desiredVecDetailLevel.rank) { - // Suppress headers and all objects below. - invokeFlag = false; - } - } - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); - } - } else { - // Do not display vectorization objects. - switch (vectorization) { - case SUMMARY: - case OPERATOR: - case EXPRESSION: - case DETAIL: - invokeFlag = false; - break; - case NON_VECTORIZED: - // No action. - break; - case SUMMARY_PATH: - case OPERATOR_PATH: - // Always include headers since they contain non-vectorized objects, too. - break; - default: - throw new RuntimeException("Unknown EXPLAIN vectorization " + vectorization); + if (invokeFlag) { + if (this.work != null && this.work.isVectorization()) { + invokeFlag = isInvokeVectorization(vectorization); + } else { + invokeFlag = isInvokeNonVectorization(vectorization); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java index f45a012..fba78e9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java @@ -27,6 +27,7 @@ import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.Constants; @@ -41,12 +42,16 @@ import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.AbstractRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer; import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities; @@ -66,7 +71,9 @@ import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.ReflectionUtil; @@ -74,8 +81,8 @@ import org.slf4j.LoggerFactory; import com.google.common.annotations.VisibleForTesting; - import com.esotericsoftware.kryo.KryoException; +import com.google.common.base.Preconditions; /** * Map side Join operator implementation. @@ -105,6 +112,13 @@ protected HybridHashTableContainer firstSmallTable; // The first small table; // Only this table has spilled big table rows + protected transient boolean isFullOuterMapJoin; + protected transient boolean isFullOuterForwardKeysToIntersect; + protected transient boolean isFullOuterIntersect; + + protected transient int fullOuterBigTableRetainSize; + protected transient MatchTracker matchTracker; + protected transient boolean isTestingNoHashTableLoad; // Only used in bucket map join. private transient int numBuckets = -1; @@ -177,6 +191,8 @@ protected void initializeOp(Configuration hconf) throws HiveException { hybridMapJoinLeftover = false; firstSmallTable = null; + dpFullOuterMapJoinInit(); + generateMapMetaData(); isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf, @@ -252,6 +268,28 @@ protected void completeInitializationOp(Object[] os) throws HiveException { } } + private void dpFullOuterMapJoinInit() { + + // This will be set during the first process call or during closeOp if no rows processed. + matchTracker = null; + + isFullOuterMapJoin = (condn.length == 1 && condn[0].getType() == JoinDesc.FULL_OUTER_JOIN); + if (isFullOuterMapJoin) { + fullOuterBigTableRetainSize = conf.getRetainList().get(posBigTable).size(); + isFullOuterForwardKeysToIntersect = !conf.isDynamicPartitionHashJoin(); + if (isFullOuterForwardKeysToIntersect) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator.. + auxiliaryChildIndex = 1; + } + isFullOuterIntersect = conf.isFullOuterIntersect(); + } else { + isFullOuterForwardKeysToIntersect = false; + isFullOuterIntersect = false; + } + } + @VisibleForTesting public void setTestMapJoinTableContainer(int posSmallTable, MapJoinTableContainer testMapJoinTableContainer, @@ -415,6 +453,27 @@ public void cleanUpInputFileChangedOp() throws HiveException { return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]); } + protected JoinUtil.JoinResult setMapJoinKeyNoNulls( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + return dest.setFromRowNoNulls(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + protected void setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + dest.setFromRowNoResult(row, joinKeys[alias], joinKeysObjectInspectors[alias], matchTracker); + } + + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + Object standardFirstTimeMatchRow = + ObjectInspectorUtils.copyToStandardObject( + firstTimeMatchRow, inputObjInspectors[posBigTable], ObjectInspectorCopyOption.WRITABLE); + forwardAuxiliary(standardFirstTimeMatchRow, outputOI); + } + protected MapJoinKey getRefKey(byte alias) { // We assume that since we are joining on the same key, all tables would have either // optimized or non-optimized key; hence, we can pass any key in any table as reference. @@ -437,6 +496,10 @@ public void process(Object row, int tag) throws HiveException { for (byte pos = 0; pos < order.length; pos++) { if (pos != alias) { hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey); + if (isFullOuterMapJoin) { + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetters[pos]).createMatchTracker(); + } } } } @@ -463,7 +526,22 @@ public void process(Object row, int tag) throws HiveException { ReusableGetAdaptor adaptor; if (firstSetKey == null) { adaptor = firstSetKey = hashMapRowGetters[pos]; - joinResult = setMapJoinKey(firstSetKey, row, alias); + if (!isFullOuterMapJoin) { + joinResult = setMapJoinKey(firstSetKey, row, alias); + } else if (!isFullOuterIntersect) { + // We do not match if key has any NULLs. + joinResult = setMapJoinKeyNoNulls(firstSetKey, row, alias, matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + forwardFirstTimeMatchToFullOuterIntersect(row, outputObjInspector); + } + } else { + // For FULL OUTER MapJoin Intersect, we just lookup the key. + // (NOTE: Keys with NULLs should not arrive here). + setMapJoinKeyNoResult(firstSetKey, row, alias, matchTracker); + return; + } } else { // Keys for all tables are the same, so only the first has to deserialize them. adaptor = hashMapRowGetters[pos]; @@ -544,8 +622,122 @@ protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object bigTable.add(row); } + protected void generateFullOuterSmallTableNoMatches() throws HiveException { + + // FUTURE: Currently, in the MapJoinOperaotr, we only support FULL OUTER MapJoin for + // FUTURE MapJoinBytesTableContainer. NOTE: Vectorization code will override this method. + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + MapJoinBytesTableContainer smallTable = null; + byte smallTablePos = -1; + for (byte pos = 0; pos < mapJoinTables.length; pos++) { + if (pos != conf.getPosBigTable()) { + smallTable = (MapJoinBytesTableContainer) mapJoinTables[pos]; + smallTablePos = pos; + if (matchTracker == null) { + + // When the process method isn't called (i.e. no rows), then we need to create the + // MatchTracker here. + // + ReusableGetAdaptor hashMapRowGetter = smallTable.createGetter(null); + matchTracker = + ((ReusableGetAdaptorDirectAccess) hashMapRowGetter).createMatchTracker(); + } + break; + } + } + Preconditions.checkState(smallTablePos != -1); + + boolean isSmallTableValuesOnly = false; + int[] smallTableValuesIndex = conf.getValueIndex(smallTablePos); + if (smallTableValuesIndex == null) { + List valuesList = conf.getRetainList().get(smallTablePos); + smallTableValuesIndex = + ArrayUtils.toPrimitive(valuesList.toArray(new Integer[0])); + isSmallTableValuesOnly = true; + } + final int smallTableValuesIndexSize = smallTableValuesIndex.length; + + // Our first output column for Small Table results is based on order. (The Big Table columns + // will all be NULL). + final int firstOutputColumnNum = (posBigTable == (byte) 0 ? fullOuterBigTableRetainSize : 0); + + /* + * Create iterator that produces each non-matched Small Table key and a ReusableRowContainer + * the Small Table values. + */ + NonMatchedSmallTableIterator nonMatchedIterator = + smallTable.createNonMatchedSmallTableIterator(matchTracker); + int nonMatchedKeyCount = 0; + int nonMatchedValueCount = 0; + while (nonMatchedIterator.isNext()) { + List keyObjList = nonMatchedIterator.getCurrentKey(); + + MapJoinRowContainer values = nonMatchedIterator.getCurrentRows(); + AbstractRowContainer.RowIterator> iter = values.rowIter(); + for (List valueObjList = iter.first(); + valueObjList != null; + valueObjList = iter.next()) { + + // Form non-matched Small Table join result. We only fill in the Small Table columns, + // so the Big Table retained columns are NULLs from the new allocation. + + Object[] row = new Object[fullOuterBigTableRetainSize + smallTableValuesIndexSize]; + int outputColumnNum = firstOutputColumnNum; + + if (isSmallTableValuesOnly) { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + row[outputColumnNum++] = valueObjList.get(smallTableValuesIndex[i]); + } + } else { + for (int i = 0; i < smallTableValuesIndexSize; i++) { + final int index = smallTableValuesIndex[i]; + + if (index >= 0) { + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + row[outputColumnNum++] = keyObjList.get(index); + } else { + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + int smallTableValueIndex = -index - 1; + + row[outputColumnNum++] = valueObjList.get(smallTableValueIndex); + } + } + } + + // UNDONE: Do we need to copy the objects? + Object standardCopyRow = + ObjectInspectorUtils.copyToStandardObject( + row, outputObjInspector, ObjectInspectorCopyOption.WRITABLE); + + internalForward(standardCopyRow, outputObjInspector); + nonMatchedValueCount++; + } + + nonMatchedKeyCount++; + } + } + @Override public void closeOp(boolean abort) throws HiveException { + + // FUTURE: Currently, we only support FULL OUTER MapJoin for single condition MapJoins. + if (isFullOuterMapJoin) { + generateFullOuterSmallTableNoMatches(); + } + boolean spilled = false; for (MapJoinTableContainer container : mapJoinTables) { if (container != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java index c28ef99..662f5c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java @@ -242,6 +242,7 @@ public RowSchema getSchema() { // for output rows of this operator protected transient ObjectInspector outputObjInspector; + protected transient int auxiliaryChildIndex = -1; public void setId(String id) { this.id = id; @@ -914,51 +915,21 @@ protected long getNextCntr(long cntr) { protected void forward(Object row, ObjectInspector rowInspector) throws HiveException { - forward(row, rowInspector, false); - } - - protected void forward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - forward(vrg, rowInspector, true); - } - - protected void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) - throws HiveException { - if (isVectorized) { - vectorForward((VectorizedRowBatch) row, rowInspector); - } else { - baseForward(row, rowInspector); - } - } - - private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) - throws HiveException { - this.runTimeNumRows += vrg.count(); + runTimeNumRows++; if (getDone()) { return; } - // Data structures to store original values - final int size = vrg.size; - final boolean selectedInUse = vrg.selectedInUse; - final boolean saveState = (selectedInUse && multiChildren); - if (saveState) { - System.arraycopy(vrg.selected, 0, selected, 0, size); - } - int childrenDone = 0; for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } Operator o = childOperatorsArray[i]; if (o.getDone()) { childrenDone++; } else { - o.process(vrg, childOperatorsTag[i]); - // Restore original values - vrg.size = size; - vrg.selectedInUse = selectedInUse; - if (saveState) { - System.arraycopy(selected, 0, vrg.selected, 0, size); - } + o.process(row, childOperatorsTag[i]); } } @@ -968,27 +939,70 @@ private void vectorForward(VectorizedRowBatch vrg, ObjectInspector rowInspector) } } - private void baseForward(Object row, ObjectInspector rowInspector) + public void forwardAuxiliary(Object row, ObjectInspector rowInspector) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; + } + + auxiliaryChild.process(row, childOperatorsTag[auxiliaryChildIndex]); + } + + protected void vectorForward(VectorizedRowBatch batch) throws HiveException { - this.runTimeNumRows++; + + runTimeNumRows++; if (getDone()) { return; } - int childrenDone = 0; - for (int i = 0; i < childOperatorsArray.length; i++) { - Operator o = childOperatorsArray[i]; - if (o.getDone()) { - childrenDone++; - } else { - o.process(row, childOperatorsTag[i]); + // Data structures to store original values + final int size = batch.size; + final boolean selectedInUse = batch.selectedInUse; + final boolean saveState = (selectedInUse && multiChildren); + if (saveState) { + System.arraycopy(batch.selected, 0, selected, 0, size); + } + + final int childSize = childOperatorsArray.length; + if (childSize == 1) { + childOperatorsArray[0].process(batch, 0); + } else { + int childrenDone = 0; + for (int i = 0; i < childOperatorsArray.length; i++) { + if (i == auxiliaryChildIndex) { + continue; + } + Operator o = childOperatorsArray[i]; + if (o.getDone()) { + childrenDone++; + } else { + o.process(batch, 0); + + // Restore original values + batch.size = size; + batch.selectedInUse = selectedInUse; + if (saveState) { + System.arraycopy(selected, 0, batch.selected, 0, size); + } + } + } + // if all children are done, this operator is also done + if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { + setDone(true); } } + } - // if all children are done, this operator is also done - if (childrenDone != 0 && childrenDone == childOperatorsArray.length) { - setDone(true); + public void vectorForwardAuxiliary(VectorizedRowBatch batch) throws HiveException { + + Operator auxiliaryChild = childOperatorsArray[auxiliaryChildIndex]; + if (auxiliaryChild.getDone()) { + return; } + + auxiliaryChild.process(batch, 0); } public void reset(){ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java index 0799181..ca04467 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java @@ -122,7 +122,11 @@ public void process(Object row, int tag) throws HiveException { if (conf != null && conf.isGatherStats()) { gatherStats(row); } - forward(row, inputObjInspectors[tag], vectorized); + if (vectorized) { + vectorForward((VectorizedRowBatch) row); + } else { + forward(row, inputObjInspectors[tag]); + } } private boolean checkSetDone(Object row, int tag) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java index add8bda..8d77b61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/BytesBytesMultiHashMap.java @@ -466,16 +466,18 @@ public void put(KvSource kv, int keyHashCode) throws SerDeException { * @param key Key buffer. * @param offset the offset to the key in the buffer * @param hashMapResult The object to fill in that can read the values. + * @param matchTracker Opitional object for tracking key matches. * @return The state byte. */ - public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult) { + public byte getValueResult(byte[] key, int offset, int length, Result hashMapResult, + MatchTracker matchTracker) { hashMapResult.forget(); WriteBuffers.Position readPos = hashMapResult.getReadPos(); // First, find first record for the key. - long ref = findKeyRefToRead(key, offset, length, readPos); + long ref = findKeyRefToRead(key, offset, length, readPos, matchTracker); if (ref == 0) { return 0; } @@ -490,6 +492,12 @@ public byte getValueResult(byte[] key, int offset, int length, Result hashMapRes return Ref.getStateByte(ref); } + public void lookupKeyNoResult(byte[] key, int offset, int length, WriteBuffers.Position readPos, + MatchTracker matchTracker) { + + findKeyRefToRead(key, offset, length, readPos, matchTracker); + } + /** * Take the segment reference from {@link #getValueRefs(byte[], int, List)} * result and makes it self-contained - adds byte array where the value is stored, and @@ -500,6 +508,53 @@ public void populateValue(WriteBuffers.ByteSegmentRef valueRef) { } /** + * Finds the next + * @param currentSlotNum Start by specifying -1; the return index from the previous call. + * @param keyRef If the return value is not -1, a reference to the key bytes. + * @param hashMapResult If the return value is not -1, the key's values. + * @param matchTracker The object that tracks matches (non-shared). + * @return The current index of the non-matched key; or -1 if no more. + */ + public int findNextNonMatched(int currentSlotNum, WriteBuffers.ByteSegmentRef keyRef, + Result hashMapResult, MatchTracker matchTracker) { + currentSlotNum++; + + hashMapResult.forget(); + + WriteBuffers.Position readPos = hashMapResult.getReadPos(); + + while (true) { + if (currentSlotNum >= refs.length) { + + // No more. + return -1; + } + long ref = refs[currentSlotNum]; + if (ref != 0 && !matchTracker.wasMatched(currentSlotNum)) { + + // An unmatched key. + writeBuffers.setReadPoint(getFirstRecordLengthsOffset(ref, readPos), readPos); + int valueLength = (int) writeBuffers.readVLong(readPos); + int keyLength = (int) writeBuffers.readVLong(readPos); + long keyOffset = Ref.getOffset(ref) - (valueLength + keyLength); + + keyRef.reset(keyOffset, keyLength); + if (keyLength > 0) { + writeBuffers.populateValue(keyRef); + } + + boolean hasList = Ref.hasList(ref); + long offsetAfterListRecordKeyLen = hasList ? writeBuffers.getReadPoint(readPos) : 0; + + hashMapResult.set(this, Ref.getOffset(ref), hasList, offsetAfterListRecordKeyLen); + + return currentSlotNum; + } + currentSlotNum++; + } + } + + /** * Number of keys in the hashmap * @return number of keys */ @@ -516,8 +571,12 @@ public int getNumValues() { return numValues; } + public int getNumHashBuckets() { + return refs.length; + } + /** - * Number of bytes used by the hashmap + * Number of bytes used by the hashmap. * There are two main components that take most memory: writeBuffers and refs * Others include instance fields: 100 * @return number of bytes @@ -614,7 +673,7 @@ private int findKeySlotToWrite(long keyOffset, int keyLength, int hashCode) { * @return The ref to use for reading. */ private long findKeyRefToRead(byte[] key, int offset, int length, - WriteBuffers.Position readPos) { + WriteBuffers.Position readPos, MatchTracker matchTracker) { final int bucketMask = (refs.length - 1); int hashCode = writeBuffers.hashCode(key, offset, length); int slot = hashCode & bucketMask; @@ -629,6 +688,13 @@ private long findKeyRefToRead(byte[] key, int offset, int length, return 0; } if (isSameKey(key, offset, length, ref, hashCode, readPos)) { + + if (matchTracker != null) { + + // It will only update memory when not set. + matchTracker.trackMatch(slot); + } + return ref; } ++metricGetConflict; @@ -897,7 +963,7 @@ public void debugDumpTable() { dump.append(Utils.toStringBinary(key, 0, key.length)).append(" ref [").append(dumpRef(ref)) .append("]: "); Result hashMapResult = new Result(); - getValueResult(key, 0, key.length, hashMapResult); + getValueResult(key, 0, key.length, hashMapResult, null); List results = new ArrayList(); WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java index 9d35805..74ff48f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java @@ -32,6 +32,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -113,6 +114,7 @@ public void put(MapJoinKey key, MapJoinRowContainer value) { public int size() { return mHash.size(); } + @Override public Set> entrySet() { return mHash.entrySet(); @@ -141,6 +143,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public long getEstimatedMemorySize() { // TODO: Key and Values are Object[] which can be eagerly deserialized or lazily deserialized. To accurately // estimate the entry size, every possible Objects in Key, Value should implement MemoryEstimate interface which @@ -188,6 +196,22 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (currentKey == null) { @@ -208,6 +232,18 @@ public GetAdaptor(MapJoinKey key) { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java index 027e39a..ae1a49b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HybridHashTableContainer.java @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.SerializationUtilities; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -776,6 +777,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override public void seal() { for (HashPartition hp : hashPartitions) { // Only seal those partitions that haven't been spilled and cleared, @@ -835,6 +842,22 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracer) + throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (nulls == null) { @@ -851,6 +874,18 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + throw new RuntimeException("Not supported"); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) throws HiveException { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; @@ -884,7 +919,9 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + + // UNDONE: matchTracker return currentValue.setDirect(bytes, offset, length, hashMapResult); } @@ -892,6 +929,17 @@ public MapJoinRowContainer getCurrentRows() { public int directSpillPartitionId() { return currentValue.directSpillPartitionId(); } + + @Override + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override + public MatchTracker createMatchTracker() { + throw new RuntimeException("Not implemented"); + } } /** Row container that gets and deserializes the rows on demand from bytes provided. */ @@ -966,7 +1014,7 @@ public ReusableRowContainer() { } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(output.getData(), 0, - output.getLength(), hashMapResult); + output.getLength(), hashMapResult, null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -977,6 +1025,10 @@ public ReusableRowContainer() { } } + public void reset() { + hashMapResult.forget(); + } + @Override public boolean hasRows() { return hashMapResult.hasRows() || (dummyRow != null); @@ -1116,7 +1168,7 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out } else { aliasFilter = hashPartitions[partitionId].hashMap.getValueResult(bytes, offset, length, - hashMapResult); + hashMapResult, null); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java index 033bbdb..9670038 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinBytesTableContainer.java @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper; import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; @@ -40,6 +41,7 @@ import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe; import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef; @@ -88,6 +90,7 @@ * compare the large table keys correctly when we do, we need to serialize them with correct * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe. */ + private AbstractSerDe keySerde; private boolean[] sortableSortOrders; private byte[] nullMarkers; private byte[] notNullMarkers; @@ -336,9 +339,17 @@ public void setKeyValue(Writable key, Writable val) { @Override public byte updateStateByte(Byte previousValue) { - if (filterGetter == null) return (byte)0xff; + if (!hasTag || filterGetter == null) { + return (byte) 0xff; + } byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue(); - filterGetter.init((BinaryComparable)value); + BinaryComparable binaryComparableValue = (BinaryComparable) value; + if (binaryComparableValue.getLength() == 0) { + + // Skip empty values just like MapJoinEagerRowContainer.read does. + return (byte) 0xff; + } + filterGetter.init(binaryComparableValue); aliasFilter &= filterGetter.getShort(); return aliasFilter; } @@ -407,7 +418,8 @@ public long getEstimatedMemorySize() { @Override public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext) throws SerDeException { - AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe(); + keySerde = keyContext.getSerDe(); + AbstractSerDe valSerde = valueContext.getSerDe(); if (writeHelper == null) { LOG.info("Initializing container with " + keySerde.getClass().getName() + " and " + valSerde.getClass().getName()); @@ -456,6 +468,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + return new NonMatchedSmallTableIteratorImpl(matchTracker); + } + + @Override public void seal() { hashMap.seal(); } @@ -542,6 +560,64 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + boolean hasNulls = false; + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + if (currentKey[i] == null) { + nulls[i] = true; + hasNulls = true; + } else { + nulls[i] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override + public void setFromVectorNoResult(VectorHashKeyWrapper kw, + VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch, + MatchTracker matchTracker) + throws HiveException { + if (nulls == null) { + nulls = new boolean[keyOutputWriters.length]; + currentKey = new Object[keyOutputWriters.length]; + vectorKeyOIs = new ArrayList(); + for (int i = 0; i < keyOutputWriters.length; i++) { + vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector()); + } + } else { + assert nulls.length == keyOutputWriters.length; + } + for (int i = 0; i < keyOutputWriters.length; i++) { + currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]); + nulls[i] = currentKey[i] == null; + } + currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override public JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException { if (nulls == null) { @@ -558,6 +634,48 @@ public GetAdaptor() { } @Override + public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + boolean hasNulls = false; + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + if (currentKey[keyIndex] == null) { + nulls[keyIndex] = true; + hasNulls = true; + } else { + nulls[keyIndex] = false; + } + } + if (hasNulls) { + currentValue.reset(); + return JoinUtil.JoinResult.NOMATCH; + } + return currentValue.setFromOutput( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override + public void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) throws HiveException { + if (nulls == null) { + nulls = new boolean[fields.size()]; + currentKey = new Object[fields.size()]; + } + for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) { + currentKey[keyIndex] = fields.get(keyIndex).evaluate(row); + nulls[keyIndex] = currentKey[keyIndex] == null; + } + currentValue.setFromOutputNoResult( + MapJoinKey.serializeRow(output, currentKey, ois, + sortableSortOrders, nullMarkers, notNullMarkers), matchTracker); + } + + @Override public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) { assert other instanceof GetAdaptor; GetAdaptor other2 = (GetAdaptor)other; @@ -591,14 +709,26 @@ public MapJoinRowContainer getCurrentRows() { @Override public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - return currentValue.setDirect(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + return currentValue.setDirect( + bytes, offset, length, hashMapResult, matchTracker); } @Override public int directSpillPartitionId() { throw new UnsupportedOperationException("Getting the spill hash partition not supported"); } + + @Override + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + currentValue.setDirectNoResult(bytes, offset, length, readPos, matchTracker); + } + + @Override + public MatchTracker createMatchTracker() { + return new MatchTracker(hashMap.getNumHashBuckets()); + } } /** Row container that gets and deserializes the rows on demand from bytes provided. */ @@ -619,6 +749,7 @@ public int directSpillPartitionId() { private final LazyBinaryStruct valueStruct; private final boolean needsComplexObjectFixup; private final ArrayList complexObjectArrayBuffer; + private final WriteBuffers.Position noResultReadPos; public ReusableRowContainer() { if (internalValueOi != null) { @@ -639,13 +770,31 @@ public ReusableRowContainer() { } uselessIndirection = new ByteArrayRef(); hashMapResult = new BytesBytesMultiHashMap.Result(); + noResultReadPos = new WriteBuffers.Position(); clearRows(); } + public BytesBytesMultiHashMap.Result getHashMapResult() { + return hashMapResult; + } + public JoinUtil.JoinResult setFromOutput(Output output) { aliasFilter = hashMap.getValueResult( - output.getData(), 0, output.getLength(), hashMapResult); + output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null); + dummyRow = null; + if (hashMapResult.hasRows()) { + return JoinUtil.JoinResult.MATCH; + } else { + aliasFilter = (byte) 0xff; + return JoinUtil.JoinResult.NOMATCH; + } + } + + public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) { + + aliasFilter = hashMap.getValueResult( + output.getData(), 0, output.getLength(), hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -653,8 +802,16 @@ public ReusableRowContainer() { aliasFilter = (byte) 0xff; return JoinUtil.JoinResult.NOMATCH; } + } - } + public void setFromOutputNoResult(Output output, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult( + output.getData(), 0, output.getLength(), noResultReadPos, matchTracker); + } + + public void reset() { + hashMapResult.forget(); + } @Override public boolean hasRows() { @@ -773,8 +930,8 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out // Direct access. public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult) { - aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) { + aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker); dummyRow = null; if (hashMapResult.hasRows()) { return JoinUtil.JoinResult.MATCH; @@ -783,6 +940,70 @@ public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out return JoinUtil.JoinResult.NOMATCH; } } + + public void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + hashMap.lookupKeyNoResult(bytes, offset, length, readPos, matchTracker); + } + } + + private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator { + + private final MatchTracker matchTracker; + + private int currentIndex; + + private final WriteBuffers.ByteSegmentRef keyRef; + private final BytesWritable bytesWritable; + private final ReusableRowContainer currentValue; + + NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + + currentIndex = -1; + + keyRef = new WriteBuffers.ByteSegmentRef(); + bytesWritable = new BytesWritable(); + + currentValue = new ReusableRowContainer(); + } + + @Override + public boolean isNext() { + + // If another non-matched key is found, the key bytes will be referenced by keyRef, and + // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows. + currentIndex = + hashMap.findNextNonMatched( + currentIndex, keyRef, currentValue.getHashMapResult(), matchTracker); + return (currentIndex != -1); + } + + @Override + public List getCurrentKey() throws HiveException { + List deserializedList = + MapJoinKey.deserializeRow( + keyRef.getBytes(), + (int) keyRef.getOffset(), + keyRef.getLength(), + bytesWritable, keySerde); + return deserializedList; + } + + @Override + public ByteSegmentRef getCurrentKeyAsRef() { + return keyRef; + } + + @Override + public MapJoinRowContainer getCurrentRows() { + return currentValue; + } + + @Override + public BytesBytesMultiHashMap.Result getHashMapResult() { + return currentValue.getHashMapResult(); + } } public static boolean isSupportedKey(ObjectInspector keyOi) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java index 6504a5f..ac91187 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinKey.java @@ -40,6 +40,7 @@ import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; +import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; /** @@ -171,4 +172,18 @@ public static Output serializeRow(Output byteStream, Object[] fieldData, } return byteStream; } + + /** + * Deserializes a key. + * @param BytesWritable to reuse. + */ + public static List deserializeRow(byte[] keyBytes, int keyOffset, int keyLength, + BytesWritable bytesWritable, AbstractSerDe serde) throws HiveException { + try { + bytesWritable.set(keyBytes, keyOffset, keyLength); + return (List) serde.deserialize(bytesWritable); + } catch (SerDeException e) { + throw new HiveException("Serialization error", e); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java index 345d1f4..1d7aec8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectSerDeContext.java @@ -17,11 +17,16 @@ */ package org.apache.hadoop.hive.ql.exec.persistence; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructField; @SuppressWarnings("deprecation") public class MapJoinObjectSerDeContext { @@ -55,6 +60,18 @@ public boolean hasFilterTag() { return hasFilter; } + public String stringify() { + StandardStructObjectInspector standardStructOI = (StandardStructObjectInspector) standardOI; + List structFields = standardStructOI.getAllStructFieldRefs(); + List typeInfoStrings = new ArrayList(); + for (StructField field : structFields) { + ObjectInspector fieldOI = field.getFieldObjectInspector(); + typeInfoStrings.add(fieldOI.getTypeName()); + } + return "[types " + typeInfoStrings.toString() + ", serde=" + serde.getClass().getName() + + ", hasFilter=" + hasFilter + "]"; + } + @Override public String toString() { return "MapJoinObjectSerDeContext [standardOI=" + standardOI + ", serde=" + serde diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java index b0c7574..307852d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinTableContainer.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -46,6 +47,12 @@ JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch) throws HiveException; + JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + + void setFromVectorNoResult(VectorHashKeyWrapper kw, VectorExpressionWriter[] keyOutputWriters, + VectorHashKeyWrapperBatch keyWrapperBatch, MatchTracker matchTracker) throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key represented by a row object, and fields and ois used to interpret it. @@ -53,6 +60,15 @@ JoinUtil.JoinResult setFromRow(Object row, List fields, List ois) throws HiveException; + // Version with MatchTracker object. + JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + + void setFromRowNoResult(Object row, List fields, + List ois, MatchTracker matchTracker) + throws HiveException; + /** * Changes current rows to which adaptor is referring to the rows corresponding to * the key that another adaptor has already deserialized via setFromVector/setFromRow. @@ -82,6 +98,41 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException, HiveException, IOException; /** + * Iterates through the Small Table hash table and returns the key and value rows for any + * non-matched keys. + */ + public interface NonMatchedSmallTableIterator { + /** + * Return true if another non-matched key was found. + */ + boolean isNext(); + + /** + * @return The current key as a desearialized object array after a successful next() call + * that returns true. + * @throws HiveException + */ + List getCurrentKey() throws HiveException; + + /** + * @return The current key as a WriteBuffers.ByteSegmentRef after a successful next() call + * that returns true. + */ + ByteSegmentRef getCurrentKeyAsRef(); + + /** + * @return The container w/the values rows for the current key after a successful next() call + * that returns true. + */ + MapJoinRowContainer getCurrentRows(); + + /** + * @return The value rows has a BytesBytesMultiHashMap result. + */ + BytesBytesMultiHashMap.Result getHashMapResult(); + } + + /** * Indicates to the container that the puts have ended; table is now r/o. */ void seal(); @@ -94,6 +145,12 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) */ ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader); + /** + * Creates an iterator for going through the hash table and returns the key and value rows for any + * non-matched keys + */ + NonMatchedSmallTableIterator createNonMatchedSmallTableIterator(MatchTracker matchTracker); + /** Clears the contents of the table. */ void clear(); @@ -108,7 +165,7 @@ MapJoinKey putRow(Writable currentKey, Writable currentValue) boolean hasSpill(); /** - * Return the size of the hash table + * Return the size of the hash table. */ int size(); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java new file mode 100644 index 0000000..ff144ac --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MatchTracker.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.persistence; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/* + * Record which slot table entries had key matches for FULL OUTER MapJoin. + */ +public class MatchTracker { + private static final Logger LOG = LoggerFactory.getLogger(MatchTracker.class); + + private final int logicalHashBucketCount; + private long[] longMatchFlags; + + public MatchTracker(int logicalHashBucketCount) { + this.logicalHashBucketCount = logicalHashBucketCount; + + final int longMatchFlagsSize = (logicalHashBucketCount + Long.SIZE - 1) / Long.SIZE; + longMatchFlags = new long[longMatchFlagsSize]; + } + + protected boolean isFirstMatch; + + public boolean getIsFirstMatch() { + return isFirstMatch; + } + + /* + * Track another match. + * @return Returns true if this is the first match. + */ + public void trackMatch(int logicalSlotNum) { + + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + if ((longMatchFlags[longWordIndex] & longBitMask) != 0) { + + // Flag is already on. + isFirstMatch = false; + } + longMatchFlags[longWordIndex] |= longBitMask; + isFirstMatch = true; + } + + /* + * @return Returns true if the slot key was matched. + */ + public boolean wasMatched(int logicalSlotNum) { + final int longWordIndex = logicalSlotNum / Long.SIZE; + final long longBitMask = 1L << (logicalSlotNum % Long.SIZE); + return (longMatchFlags[longWordIndex] & longBitMask) != 0; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java index 3303cc4..d22740d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/ReusableGetAdaptorDirectAccess.java @@ -20,11 +20,16 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.serde2.WriteBuffers; public interface ReusableGetAdaptorDirectAccess { JoinResult setDirect(byte[] bytes, int offset, int length, - BytesBytesMultiHashMap.Result hashMapResult); + BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker); int directSpillPartitionId(); + void setDirectNoResult(byte[] bytes, int offset, int length, + WriteBuffers.Position readPos, MatchTracker matchTracker); + + MatchTracker createMatchTracker(); } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java index 95400c8..0ff54ff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/UnwrapRowContainer.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.io.ShortWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; /** @@ -70,6 +71,8 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu return unwrap(iterator.next()); } + private static final ShortWritable ALL_ALIAS_FILTER_SHORT_WRITABLE = new ShortWritable((byte) 0xff); + private List unwrap(List values) { if (values == null) { return null; @@ -90,7 +93,14 @@ public MapJoinRowContainer setInternal(MapJoinRowContainer internal, Object[] cu } } if (tagged) { - unwrapped.add(values.get(values.size() - 1)); // append filter tag + + // Append filter tag. + final int size = values.size(); + if (size == 0) { + unwrapped.add(ALL_ALIAS_FILTER_SHORT_WRITABLE); + } else { + unwrapped.add(values.get(size - 1)); + } } return unwrapped; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java index 2cccb44..152dc98 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/tez/ReduceRecordProcessor.java @@ -262,7 +262,7 @@ public DynamicValueRegistryTez call() { e.getMessage()); throw (InterruptedException) e; } else { - throw new RuntimeException("Reduce operator initialization failed", e); + throw new RuntimeException(redWork.getName() + " operator initialization failed", e); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java index c4503ad..f2400b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAppMasterEventOperator.java @@ -136,7 +136,7 @@ public void process(Object data, int tag) throws HiveException { throw new HiveException(e); } - forward(data, rowInspector, true); + forward(data, rowInspector); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java index e96619c..9615869 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorAssignRow.java @@ -20,6 +20,7 @@ import java.sql.Date; import java.sql.Timestamp; +import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -985,6 +986,17 @@ public void assignRow(VectorizedRowBatch batch, int batchIndex, Object[] objects } } + public void assignRow(VectorizedRowBatch batch, int batchIndex, ArrayList objectList) { + final int count = isConvert.length; + for (int i = 0; i < count; i++) { + if (isConvert[i]) { + assignConvertRowColumn(batch, batchIndex, i, objectList.get(i)); + } else { + assignRowColumn(batch, batchIndex, i, objectList.get(i)); + } + } + } + /* * Assign a row from a list of standard objects up to a count */ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java index bedc12a..0cf8491 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorCopyRow.java @@ -259,14 +259,27 @@ void copy(VectorizedRowBatch inBatch, int inBatchIndex, VectorizedRowBatch outBa private CopyRow[] subRowToBatchCopiersByReference; public void init(VectorColumnMapping columnMapping) throws HiveException { - int count = columnMapping.getCount(); + init( + columnMapping.getInputColumns(), + columnMapping.getOutputColumns(), + columnMapping.getTypeInfos()); + } + + public void init(int[] columnMap, TypeInfo[] typeInfos) throws HiveException { + init(columnMap, columnMap, typeInfos); + } + + public void init(int[] inputColumnMap, int[] outputColumnMap, TypeInfo[] typeInfos) + throws HiveException { + + final int count = inputColumnMap.length; subRowToBatchCopiersByValue = new CopyRow[count]; subRowToBatchCopiersByReference = new CopyRow[count]; for (int i = 0; i < count; i++) { - int inputColumn = columnMapping.getInputColumns()[i]; - int outputColumn = columnMapping.getOutputColumns()[i]; - TypeInfo typeInfo = columnMapping.getTypeInfos()[i]; + int inputColumn = inputColumnMap[i]; + int outputColumn = outputColumnMap[i]; + TypeInfo typeInfo = typeInfos[i]; Type columnVectorType = VectorizationContext.getColumnVectorTypeFromTypeInfo(typeInfo); CopyRow copyRowByValue = null; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java index 8ea625e..c9927d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorDeserializeRow.java @@ -444,6 +444,38 @@ public void init(boolean[] columnsToIncludeTruncated) throws HiveException { } + public void init(int[] outputColumns, boolean[] columnsToInclude) throws HiveException { + + Preconditions.checkState( + outputColumns.length == columnsToInclude.length); + + final int columnCount = sourceTypeInfos.length; + allocateArrays(columnCount); + + int includedCount = 0; + final int[] includedIndices = new int[columnCount]; + + for (int i = 0; i < columnCount; i++) { + + if (!columnsToInclude[i]) { + + // Field not included in query. + + } else { + + initTopLevelField(i, outputColumns[i], sourceTypeInfos[i], dataTypePhysicalVariations[i]); + includedIndices[includedCount++] = i; + } + } + + // Optimizing for readField? + if (includedCount < columnCount && deserializeRead.isReadFieldSupported()) { + useReadField = true; + readFieldLogicalIndices = Arrays.copyOf(includedIndices, includedCount); + } + + } + /** * Initialize for converting the source data type that are going to be read with the * DeserializedRead interface passed to the constructor to the target data types desired in diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java index 14ac8ee..73965ad 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorFilterOperator.java @@ -133,7 +133,7 @@ public void process(Object row, int tag) throws HiveException { // All are selected, do nothing } if (vrg.size > 0) { - forward(vrg, null, true); + vectorForward(vrg); } // Restore the original selected vector diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java index 75efc29..20af960 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorGroupByOperator.java @@ -1170,7 +1170,7 @@ private void writeGroupRow(VectorAggregationBufferRow agg, DataOutputBuffer buff } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java index 051d338..7edb059 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorLimitOperator.java @@ -88,7 +88,7 @@ public void process(Object row, int tag) throws HiveException { batch.selected[i] = batch.selected[skipSize + i]; } } - forward(row, inputObjInspectors[tag], true); + vectorForward(batch); currCount += batch.size; } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java index 497b12d..c09b645 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinBaseOperator.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; @@ -25,6 +26,7 @@ import java.util.Map; import java.util.concurrent.Future; +import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; @@ -34,6 +36,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; @@ -110,6 +113,221 @@ public VectorizationContext getInputVectorizationContext() { return vContext; } + /* + * RESTRICTION: + * No MapJoin key or value expressions other than columns (i.e. ExprNodeColumnDesc). Big Table + * key and value columns can be easily determined. + * + * Big Table input maps: + * // Takes some row input and tells us which are key columns and which are value columns. + * Which input columns are the key columns. + * Which input columns are the value columns. + * // E.g. [0, 2, 10] is ctinyint (type: tinyint), cint (type: int), cboolean1 (type: boolean) + * // Input column names are _col0, _col1, _col2 by SELECT + * // so 0, 2 are the keys + * // where 1 is the value + * + * Big Table retain are input Big Table column numbers kept in the output (in output order). + * + * Big Table output mapping: + * + * // When Big Table output result starts at 0, then: + * // keys are [0, 2] + * // value is [1] + * // Needed to map Map Join output result keys and values to key and value expressions + * // that represent the Big Table input row so Auxiliary RS can be created. + * // If other order, then would start at smallTableResultSize offset. + * + * How to rename Auxiliary RS output (which is Big Table input) to _colN form? + * KEY.reducesinkkey 0 .. K - 1 are _outN where N is key map [keyNum] + * VALUE._outN are _outN where N is value map [valueNum] + + */ + public static class MapJoinBigTableInfo { + + private final int[] inputKeyColumnMap; + private final String[] inputKeyColumnNames; + private final int[] inputValueColumnMap; + private final String[] inputValueColumnNames; + + private final int[] outputKeyColumnMap; + private final int[] outputValueColumnMap; + + private boolean isBigTableFirst; + private final int bigTableResultSize; + private final int smallTableResultSize; + + public MapJoinBigTableInfo( + int[] inputKeyColumnMap, + String[] inputKeyColumnNames, + int[] inputValueColumnMap, + String[] inputValueColumnNames, + int[] outputKeyColumnMap, + int[] outputValueColumnMap, + boolean isBigTableFirst, + int bigTableResultSize, + int smallTableResultSize) { + this.inputKeyColumnMap = inputKeyColumnMap; + this.inputKeyColumnNames = inputKeyColumnNames; + this.inputValueColumnMap = inputValueColumnMap; + this.inputValueColumnNames = inputValueColumnNames; + + this.outputKeyColumnMap = outputKeyColumnMap; + this.outputValueColumnMap = outputValueColumnMap; + + this.isBigTableFirst = isBigTableFirst; + this.bigTableResultSize = bigTableResultSize; + this.smallTableResultSize = smallTableResultSize; + } + + public int[] getInputKeyColumnMap() { + return inputKeyColumnMap; + } + public String[] getInputKeyColumnNames() { + return inputKeyColumnNames; + } + public int[] getInputValueColumnMap() { + return inputValueColumnMap; + } + public String[] getInputValueColumnNames() { + return inputValueColumnNames; + } + + public int[] getOutputKeyColumnNums() { + return outputKeyColumnMap; + } + public int[] getOutputValueColumnNums() { + return outputValueColumnMap; + } + + public boolean getIsBigTableFirst() { + return isBigTableFirst; + } + public int getBigTableResultSize() { + return bigTableResultSize; + } + public int getSmallTableResultSize() { + return smallTableResultSize; + } + } + + public static MapJoinBigTableInfo getBigTableInfo(MapJoinDesc desc) { + + final byte posBigTable = (byte) desc.getPosBigTable(); + + List keyExprs = desc.getKeys().get(posBigTable); + final int keySize = keyExprs.size(); + List bigTableExprs = desc.getExprs().get(posBigTable); + + Byte[] order = desc.getTagOrder(); + Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); + + final int outputColumnCount = desc.getOutputColumnNames().size(); + TypeInfo[] outputTypeInfos = new TypeInfo[outputColumnCount]; + + /* + * Gather up big and small table output result information from the MapJoinDesc. + */ + List bigTableRetainList = desc.getRetainList().get(posBigTable); + final int bigTableRetainSize = bigTableRetainList.size(); + + int[] smallTableIndices; + int smallTableIndicesSize; + List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); + smallTableIndicesSize = smallTableIndices.length; + } else { + smallTableIndices = null; + smallTableIndicesSize = 0; + } + + List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); + + int smallTableResultSize = 0; + if (smallTableIndicesSize > 0) { + smallTableResultSize = smallTableIndicesSize; + } else if (smallTableRetainSize > 0) { + smallTableResultSize = smallTableRetainSize; + } + + /* + * Determine the big table retained mapping first so we can optimize out (with + * projection) copying inner join big table keys in the subsequent small table results section. + */ + + List inputKeyColumnNumList = new ArrayList(); + List inputKeyColumnNameList = new ArrayList(); + List inputValueColumnNumList = new ArrayList(); + List inputValueColumnNameList = new ArrayList(); + + int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + Map columnMap = new HashMap(); + for (int i = 0; i < bigTableRetainSize; i++) { + + ExprNodeColumnDesc bigTableExpr = (ExprNodeColumnDesc) bigTableExprs.get(i); + TypeInfo typeInfo = bigTableExpr.getTypeInfo(); + + outputTypeInfos[nextOutputColumn] = typeInfo; + + columnMap.put(bigTableExpr.getColumn(), i); + nextOutputColumn++; + } + + for (int i = 0; i < keySize; i++) { + ExprNodeColumnDesc keyEpxr = (ExprNodeColumnDesc) keyExprs.get(i); + inputKeyColumnNumList.add(columnMap.get(keyEpxr.getColumn())); + inputKeyColumnNameList.add(keyEpxr.getColumn()); + } + + for (int i = 0; i < bigTableRetainSize; i++) { + if (inputKeyColumnNumList.contains(i)) { + continue; + } + inputValueColumnNumList.add(i); + ExprNodeColumnDesc bigTableExpr = (ExprNodeColumnDesc) bigTableExprs.get(i); + inputValueColumnNameList.add(bigTableExpr.getColumn()); + } + + // UNDONE: + List outputKeyColumnNumList = new ArrayList(); + List outputValueColumnNumList = new ArrayList(); + + // UNDONE + outputKeyColumnNumList.addAll(inputKeyColumnNumList); + outputValueColumnNumList.addAll(inputValueColumnNumList); + + int[] inputKeyColumnNums = + ArrayUtils.toPrimitive(inputKeyColumnNumList.toArray(new Integer[0])); + String[] inputKeyColumnNames = + inputKeyColumnNameList.toArray(new String[0]); + int[] inputValueColumnNums = + ArrayUtils.toPrimitive(inputValueColumnNumList.toArray(new Integer[0])); + String[] inputValueColumnNames = + inputValueColumnNameList.toArray(new String[0]); + + int[] outputKeyColumnNums = + ArrayUtils.toPrimitive(outputKeyColumnNumList.toArray(new Integer[0])); + int[] outputValueColumnNums = + ArrayUtils.toPrimitive(outputValueColumnNumList.toArray(new Integer[0])); + + boolean isBigTableFirst = (order[0] == posBigTable); + + return new MapJoinBigTableInfo( + inputKeyColumnNums, + inputKeyColumnNames, + inputValueColumnNums, + inputValueColumnNames, + outputKeyColumnNums, + outputValueColumnNums, + isBigTableFirst, + bigTableRetainSize, + smallTableResultSize); + } + public static TypeInfo[] getOutputTypeInfos(MapJoinDesc desc) { final byte posBigTable = (byte) desc.getPosBigTable(); @@ -132,7 +350,8 @@ public VectorizationContext getInputVectorizationContext() { int[] smallTableIndices; int smallTableIndicesSize; List smallTableExprs = desc.getExprs().get(posSingleVectorMapJoinSmallTable); - if (desc.getValueIndices() != null && desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { + if (desc.getValueIndices() != null && + desc.getValueIndices().get(posSingleVectorMapJoinSmallTable) != null) { smallTableIndices = desc.getValueIndices().get(posSingleVectorMapJoinSmallTable); smallTableIndicesSize = smallTableIndices.length; } else { @@ -141,7 +360,8 @@ public VectorizationContext getInputVectorizationContext() { } List smallTableRetainList = desc.getRetainList().get(posSingleVectorMapJoinSmallTable); - final int smallTableRetainSize = smallTableRetainList.size(); + final int smallTableRetainSize = + (smallTableRetainList != null ? smallTableRetainList.size() : 0); int smallTableResultSize = 0; if (smallTableIndicesSize > 0) { @@ -216,6 +436,7 @@ public VectorizationContext getInputVectorizationContext() { return outputTypeInfos; } + @Override public void initializeOp(Configuration hconf) throws HiveException { super.initializeOp(hconf); @@ -234,7 +455,6 @@ public void initializeOp(Configuration hconf) throws HiveException { */ @Override protected void internalForward(Object row, ObjectInspector outputOI) throws HiveException { - Object[] values = (Object[]) row; VectorAssignRow va = outputVectorAssignRowMap.get(outputOI); if (va == null) { va = new VectorAssignRow(); @@ -242,7 +462,11 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive outputVectorAssignRowMap.put(outputOI, va); } - va.assignRow(outputBatch, outputBatch.size, values); + if (row instanceof ArrayList) { + va.assignRow(outputBatch, outputBatch.size, (ArrayList) row); + } else { + va.assignRow(outputBatch, outputBatch.size, (Object[]) row); + } ++outputBatch.size; if (outputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { @@ -251,7 +475,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } @@ -263,8 +487,10 @@ public void closeOp(boolean aborted) throws HiveException { tableContainer.dumpMetrics(); } } - if (!aborted && 0 < outputBatch.size) { - flushOutput(); + if (!aborted) { + if (outputBatch.size > 0) { + flushOutput(); + } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java index a84bd72..95a966c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorMapJoinOperator.java @@ -19,8 +19,13 @@ package org.apache.hadoop.hive.ql.exec.vector; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.ql.CompilationOpContext; @@ -28,6 +33,8 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriterFactory; @@ -76,6 +83,12 @@ private VectorExpressionWriter[] rowWriters; // Writer for producing row from input batch protected transient Object[] singleRow; + private transient VectorCopyRow auxiliaryVectorCopy; + + private transient VectorizedRowBatch auxiliaryOutputBatch; + + private transient int[] auxiliaryNullColumnNums; + /** Kryo ctor. */ @VisibleForTesting public VectorMapJoinOperator() { @@ -181,11 +194,6 @@ protected Object _evaluate(Object row, int version) throws HiveException { } // Now replace the old evaluators with our own joinValues[posBigTable] = vectorNodeEvaluators; - - // Filtering is handled in the input batch processing - if (filterMaps != null) { - filterMaps[posBigTable] = null; - } } @Override @@ -195,6 +203,81 @@ protected Object _evaluate(Object row, int version) throws HiveException { } @Override + protected JoinUtil.JoinResult setMapJoinKeyNoNulls(ReusableGetAdaptor dest, Object row, byte alias, + MatchTracker matchTracker) + throws HiveException { + return dest.setFromVectorNoNulls(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + + @Override + protected void setMapJoinKeyNoResult( + ReusableGetAdaptor dest, Object row, byte alias, MatchTracker matchTracker) + throws HiveException { + dest.setFromVectorNoResult(keyValues[batchIndex], keyOutputWriters, keyWrapperBatch, + matchTracker); + } + + private int[] getNonBigKeyNullColumnNums(MapJoinBigTableInfo mapJoinBigTableInfo) { + Set auxiliaryNullColumnSet = new TreeSet(); + for (Integer i : mapJoinBigTableInfo.getInputValueColumnMap()) { + auxiliaryNullColumnSet.add(i); + } + int smallTableColumnNum = + (mapJoinBigTableInfo.getIsBigTableFirst() ? + mapJoinBigTableInfo.getBigTableResultSize() : 0); + for (int i = 0; i < mapJoinBigTableInfo.getSmallTableResultSize(); i++) { + auxiliaryNullColumnSet.add(i); + } + ArrayList auxiliaryNullColumnList = new ArrayList(); + auxiliaryNullColumnList.addAll(auxiliaryNullColumnSet); + return ArrayUtils.toPrimitive(auxiliaryNullColumnList.toArray(new Integer[0])); + } + + @Override + protected void forwardFirstTimeMatchToFullOuterIntersect( + Object firstTimeMatchRow, ObjectInspector outputOI) + throws HiveException { + + if (auxiliaryVectorCopy == null) { + + MapJoinBigTableInfo mapJoinBigTableInfo = getBigTableInfo(conf); + int[] inputKeyColumnMap = mapJoinBigTableInfo.getInputKeyColumnMap(); + + auxiliaryVectorCopy = new VectorCopyRow(); + auxiliaryVectorCopy.init( + inputKeyColumnMap, + mapJoinBigTableInfo.getOutputKeyColumnNums(), + Arrays.copyOf(vOutContext.getInitialTypeInfos(), inputKeyColumnMap.length)); + + auxiliaryOutputBatch = VectorizedBatchUtil.makeLike(outputBatch); + + auxiliaryNullColumnNums = + getNonBigKeyNullColumnNums(mapJoinBigTableInfo); + } + + VectorizedRowBatch inBatch = (VectorizedRowBatch) firstTimeMatchRow; + auxiliaryVectorCopy.copyByValue( + inBatch, batchIndex, + auxiliaryOutputBatch, auxiliaryOutputBatch.size); + for (int columnNum : auxiliaryNullColumnNums) { + ColumnVector colVector = auxiliaryOutputBatch.cols[columnNum]; + colVector.isNull[auxiliaryOutputBatch.size] = true; + colVector.noNulls = true; + } + + ++auxiliaryOutputBatch.size; + if (auxiliaryOutputBatch.size == VectorizedRowBatch.DEFAULT_SIZE) { + flushAuxiliaryOutput(); + } + } + + private void flushAuxiliaryOutput() throws HiveException { + vectorForwardAuxiliary(auxiliaryOutputBatch); + auxiliaryOutputBatch.reset(); + } + + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch inBatch = (VectorizedRowBatch) row; @@ -240,6 +323,16 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean aborted) throws HiveException { + super.closeOp(aborted); + if (!aborted) { + if (auxiliaryOutputBatch != null && auxiliaryOutputBatch.size > 0) { + flushAuxiliaryOutput(); + } + } + } + + @Override protected void spillBigTableRow(MapJoinTableContainer hybridHtContainer, Object row) throws HiveException { // Extract the actual row from row batch diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java index 35f810f..a88d2c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSMBMapJoinOperator.java @@ -324,7 +324,7 @@ protected void internalForward(Object row, ObjectInspector outputOI) throws Hive } private void flushOutput() throws HiveException { - forward(outputBatch, null, true); + vectorForward(outputBatch); outputBatch.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java index 22d2f34..2f296c9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorSelectOperator.java @@ -136,7 +136,7 @@ public void process(Object row, int tag) throws HiveException { // Just forward the row as is if (conf.isSelStarNoCompute()) { - forward(row, inputObjInspectors[tag], true); + vectorForward((VectorizedRowBatch) row); return; } @@ -155,7 +155,7 @@ public void process(Object row, int tag) throws HiveException { int originalProjectionSize = vrg.projectionSize; vrg.projectionSize = projectedOutputColumns.length; vrg.projectedColumns = this.projectedOutputColumns; - forward(vrg, outputObjInspector, true); + vectorForward((VectorizedRowBatch) row); // Revert the projected columns back, because vrg will be re-used. vrg.projectionSize = originalProjectionSize; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index 93212ce..7086317 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -385,9 +385,6 @@ public DataTypePhysicalVariation getDataTypePhysicalVariation(int columnNum) thr if (initialDataTypePhysicalVariations == null) { return null; } - if (columnNum < 0) { - fake++; - } if (columnNum < initialDataTypePhysicalVariations.size()) { return initialDataTypePhysicalVariations.get(columnNum); } @@ -1648,8 +1645,6 @@ private VectorExpression getDecimal64VectorExpressionForUdf(GenericUDF genericUd return vectorExpression; } - static int fake = 0; - private VectorExpression getVectorExpressionForUdf(GenericUDF genericUdf, Class udfClass, List childExpr, VectorExpressionDescriptor.Mode mode, TypeInfo returnType) throws HiveException { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java index 4407961..2ae2609 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/expressions/VectorExpression.java @@ -285,21 +285,10 @@ protected String getDoubleValueParamString(int typeNum, double value) { } protected String getParamTypeString(int typeNum) { - if (inputTypeInfos == null || inputDataTypePhysicalVariations == null) { - fake++; - } - if (typeNum >= inputTypeInfos.length || typeNum >= inputDataTypePhysicalVariations.length) { - fake++; - } return getTypeName(inputTypeInfos[typeNum], inputDataTypePhysicalVariations[typeNum]); } - static int fake; - public static String getTypeName(TypeInfo typeInfo, DataTypePhysicalVariation dataTypePhysicalVariation) { - if (typeInfo == null) { - fake++; - } if (dataTypePhysicalVariation != null && dataTypePhysicalVariation != DataTypePhysicalVariation.NONE) { return typeInfo.toString() + "/" + dataTypePhysicalVariation; } else { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java index c832cdb..e82df78 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinCommonOperator.java @@ -21,7 +21,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; -import java.util.Map; +import java.util.Map.Entry; import org.apache.commons.lang.ArrayUtils; import org.slf4j.Logger; @@ -61,11 +61,17 @@ import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; import org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinaryDeserializeRead; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; @@ -124,6 +130,10 @@ protected void initLoggingPrefix(String className) { // a mixture of input big table columns and new scratch columns. protected VectorizationContext vOutContext; + protected VectorMapJoinVariation vectorMapJoinVariation; + protected HashTableKind hashTableKind; + protected HashTableKeyType hashTableKeyType; + // The output column projection of the vectorized row batch. And, the type infos of the output // columns. protected int[] outputProjection; @@ -149,28 +159,70 @@ protected void initLoggingPrefix(String className) { protected String[] bigTableValueColumnNames; protected TypeInfo[] bigTableValueTypeInfos; - // This is a mapping of which big table columns (input and key/value expressions) will be - // part of the big table portion of the join output result. - protected VectorColumnOutputMapping bigTableRetainedMapping; + /* + * NOTE: + * The Big Table key columns are from the key expressions. + * The Big Table value columns are from the getExpr(posBigTable) expressions. + * Any calculations needed for those will be scratch columns. + * + * The Small Table key and value output columns are scratch columns. + * + * Big Table Retain Column Map / TypeInfos: + * Any Big Table Batch columns that will be in the output result. + * 0, 1, ore more Column Nums and TypeInfos + * + * Non Outer Small Table Key Mapping: + * For non-[FULL] OUTER MapJoin, when Big Table key columns are not retained for the output + * result but are needed for the Small Table output result, they are put in this mapping + * as they are required for copying rows to the overflow batch. + * + * Outer Small Table Key Mapping + * For [FULL] OUTER MapJoin, the mapping for any Small Table key columns needed for the + * output result from the Big Table key columns. The Big Table keys cannot be projected since + * on NOMATCH there must be a physical column present to hold the non-match NULL. + * + * Full Outer Small Table Key Mapping + * For FULL OUTER MapJoin, the mapping from any needed Small Table key columns to their area + * in the output result. + * + * For deserializing a FULL OUTER non-match Small Table key into the output result. + * Can be partial or empty if some or all Small Table key columns are not retained. + * + * Small Table Value Mapping + * The mapping from Small Table value columns to their area in the output result. + * + * For deserializing Small Table value into the output result. + * + * It is the Small Table value index to output column numbers and TypeInfos. + * That is, a mapping of the LazyBinary field order to output batch scratch columns for the + * small table portion. + * Or, to use the output column nums for OUTER Small Table value NULLs. + * + */ + protected int[] bigTableRetainColumnMap; + protected TypeInfo[] bigTableRetainTypeInfos; + + protected int[] nonOuterSmallTableKeyColumnMap; + protected TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + protected VectorColumnOutputMapping outerSmallTableKeyMapping; - // This is a mapping of which keys will be copied from the big table (input and key expressions) - // to the small table result portion of the output for outer join. - protected VectorColumnOutputMapping bigTableOuterKeyMapping; + protected VectorColumnSourceMapping fullOuterSmallTableKeyMapping; - // This is a mapping of the values in the small table hash table that will be copied to the - // small table result portion of the output. That is, a mapping of the LazyBinary field order - // to output batch scratch columns for the small table portion. - protected VectorColumnSourceMapping smallTableMapping; + protected VectorColumnSourceMapping smallTableValueMapping; + // The MapJoin output result projection for both the Big Table input batch and the overflow batch. protected VectorColumnSourceMapping projectionMapping; // These are the output columns for the small table and the outer small table keys. - protected int[] smallTableOutputVectorColumns; - protected int[] bigTableOuterKeyOutputVectorColumns; + protected int[] outerSmallTableKeyColumnMap; + protected int[] smallTableValueColumnMap; // These are the columns in the big and small table that are ByteColumnVector columns. // We create data buffers for these columns so we can copy strings into those columns by value. protected int[] bigTableByteColumnVectorColumns; + protected int[] nonOuterSmallTableKeyByteColumnVectorColumns; + protected int[] outerSmallTableKeyByteColumnVectorColumns; protected int[] smallTableByteColumnVectorColumns; // The above members are initialized by the constructor and must not be @@ -186,13 +238,22 @@ protected void initLoggingPrefix(String className) { // portion of the join output. protected transient VectorCopyRow bigTableRetainedVectorCopy; + // This helper object deserializes BinarySortable format small table keys into columns of a row + // in a vectorized row batch. + protected int[] allSmallTableKeyColumnNums; + protected boolean[] allSmallTableKeyColumnIncluded; + protected transient VectorDeserializeRow smallTableKeyOuterVectorDeserializeRow; + + protected transient VectorCopyRow nonOuterSmallTableKeyVectorCopy; + + // UNDONE // A helper object that efficiently copies the big table key columns (input or key expressions) - // that appear in the small table portion of the join output for outer joins. - protected transient VectorCopyRow bigTableVectorCopyOuterKeys; + // that appear in the small table portion of the join output. + protected transient VectorCopyRow outerSmallTableKeyVectorCopy; // This helper object deserializes LazyBinary format small table values into columns of a row // in a vectorized row batch. - protected transient VectorDeserializeRow smallTableVectorDeserializeRow; + protected transient VectorDeserializeRow smallTableValueVectorDeserializeRow; // This a 2nd batch with the same "column schema" as the big table batch that can be used to // build join output results in. If we can create some join output results in the big table @@ -207,6 +268,9 @@ protected void initLoggingPrefix(String className) { // Whether the native vectorized map join operator has performed its common setup. protected transient boolean needCommonSetup; + // Whether the native vectorized map join operator has performed its first batch setup. + protected transient boolean needFirstBatchSetup; + // Whether the native vectorized map join operator has performed its // native vector map join hash table setup. protected transient boolean needHashTableSetup; @@ -214,6 +278,9 @@ protected void initLoggingPrefix(String className) { // The small table hash table for the native vectorized map join operator. protected transient VectorMapJoinHashTable vectorMapJoinHashTable; + protected transient long batchCounter; + protected transient long rowCounter; + /** Kryo ctor. */ protected VectorMapJoinCommonOperator() { super(); @@ -246,9 +313,9 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); isOuterJoin = !desc.getNoOuterJoin(); - Map> filterExpressions = desc.getFilters(); - bigTableFilterExpressions = vContext.getVectorExpressions(filterExpressions.get(posBigTable), - VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinVariation = this.vectorDesc.getVectorMapJoinVariation(); + hashTableKind = this.vectorDesc.getHashTableKind(); + hashTableKeyType = this.vectorDesc.getHashTableKeyType(); bigTableKeyColumnMap = vectorMapJoinInfo.getBigTableKeyColumnMap(); bigTableKeyColumnNames = vectorMapJoinInfo.getBigTableKeyColumnNames(); @@ -260,11 +327,19 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, bigTableValueTypeInfos = vectorMapJoinInfo.getBigTableValueTypeInfos(); bigTableValueExpressions = vectorMapJoinInfo.getSlimmedBigTableValueExpressions(); - bigTableRetainedMapping = vectorMapJoinInfo.getBigTableRetainedMapping(); + bigTableFilterExpressions = vectorMapJoinInfo.getBigTableFilterExpressions(); + + bigTableRetainColumnMap = vectorMapJoinInfo.getBigTableRetainColumnMap(); + bigTableRetainTypeInfos = vectorMapJoinInfo.getBigTableRetainTypeInfos(); + + nonOuterSmallTableKeyColumnMap = vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap(); + nonOuterSmallTableKeyTypeInfos = vectorMapJoinInfo.getNonOuterSmallTableKeyTypeInfos(); - bigTableOuterKeyMapping = vectorMapJoinInfo.getBigTableOuterKeyMapping(); + outerSmallTableKeyMapping = vectorMapJoinInfo.getOuterSmallTableKeyMapping(); - smallTableMapping = vectorMapJoinInfo.getSmallTableMapping(); + fullOuterSmallTableKeyMapping = vectorMapJoinInfo.getFullOuterSmallTableKeyMapping(); + + smallTableValueMapping = vectorMapJoinInfo.getSmallTableValueMapping(); projectionMapping = vectorMapJoinInfo.getProjectionMapping(); @@ -273,47 +348,73 @@ public VectorMapJoinCommonOperator(CompilationOpContext ctx, OperatorDesc conf, protected void determineCommonInfo(boolean isOuter) throws HiveException { - bigTableOuterKeyOutputVectorColumns = bigTableOuterKeyMapping.getOutputColumns(); - smallTableOutputVectorColumns = smallTableMapping.getOutputColumns(); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + + smallTableValueColumnMap = smallTableValueMapping.getOutputColumns(); // Which big table and small table columns are ByteColumnVector and need have their data buffer // to be manually reset for some join result processing? - bigTableByteColumnVectorColumns = getByteColumnVectorColumns(bigTableOuterKeyMapping); + bigTableByteColumnVectorColumns = + getByteColumnVectorColumns(bigTableRetainColumnMap, bigTableRetainTypeInfos); + + nonOuterSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); - smallTableByteColumnVectorColumns = getByteColumnVectorColumns(smallTableMapping); + outerSmallTableKeyByteColumnVectorColumns = + getByteColumnVectorColumns(outerSmallTableKeyMapping); + + smallTableByteColumnVectorColumns = + getByteColumnVectorColumns(smallTableValueMapping); outputProjection = projectionMapping.getOutputColumns(); outputTypeInfos = projectionMapping.getTypeInfos(); - if (LOG.isDebugEnabled()) { + if (LOG.isInfoEnabled()) { int[] orderDisplayable = new int[order.length]; for (int i = 0; i < order.length; i++) { orderDisplayable[i] = (int) order[i]; } - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor order " + Arrays.toString(orderDisplayable)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posBigTable " + (int) posBigTable); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor posSingleVectorMapJoinSmallTable " + (int) posSingleVectorMapJoinSmallTable); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnMap " + Arrays.toString(bigTableKeyColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyColumnNames " + Arrays.toString(bigTableKeyColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableKeyTypeInfos " + Arrays.toString(bigTableKeyTypeInfos)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnMap " + Arrays.toString(bigTableValueColumnMap)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueColumnNames " + Arrays.toString(bigTableValueColumnNames)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableValueTypeNames " + Arrays.toString(bigTableValueTypeInfos)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor getBigTableRetainColumnMap " + Arrays.toString(bigTableRetainColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainTypeInfos " + Arrays.toString(bigTableRetainTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableRetainedMapping " + bigTableRetainedMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor nonOuterSmallTableKeyColumnMap " + Arrays.toString(nonOuterSmallTableKeyColumnMap)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor nonOuterSmallTableKeyTypeInfos " + Arrays.toString(nonOuterSmallTableKeyTypeInfos)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableOuterKeyMapping " + bigTableOuterKeyMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outerSmallTableKeyMapping " + outerSmallTableKeyMapping.toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableMapping " + smallTableMapping.toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor fullOuterSmallTableKeyMapping " + fullOuterSmallTableKeyMapping.toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableValueMapping " + smallTableValueMapping.toString()); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor bigTableByteColumnVectorColumns " + Arrays.toString(bigTableByteColumnVectorColumns)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor smallTableByteColumnVectorColumns " + Arrays.toString(smallTableByteColumnVectorColumns)); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); + + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getKeysString " + conf.getKeysString()); + if (conf.getValueIndices() != null) { + for (Entry entry : conf.getValueIndices().entrySet()) { + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getValueIndices +" + + (int) entry.getKey() + " " + Arrays.toString(entry.getValue())); + } + } + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getExprs " + conf.getExprs().toString()); + LOG.info(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor mapJoinDesc.getRetainList " + conf.getRetainList().toString()); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputProjection " + Arrays.toString(outputProjection)); - LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator constructor outputTypeInfos " + Arrays.toString(outputTypeInfos)); } setupVOutContext(conf.getOutputColumnNames()); @@ -323,11 +424,14 @@ protected void determineCommonInfo(boolean isOuter) throws HiveException { * Determine from a mapping which columns are BytesColumnVector columns. */ private int[] getByteColumnVectorColumns(VectorColumnMapping mapping) { + return getByteColumnVectorColumns(mapping.getOutputColumns(), mapping.getTypeInfos()); + } + + private int[] getByteColumnVectorColumns(int[] outputColumns, TypeInfo[] typeInfos) { + // Search mapping for any strings and return their output columns. ArrayList list = new ArrayList(); - int count = mapping.getCount(); - int[] outputColumns = mapping.getOutputColumns(); - TypeInfo[] typeInfos = mapping.getTypeInfos(); + final int count = outputColumns.length; for (int i = 0; i < count; i++) { int outputColumn = outputColumns[i]; String typeName = typeInfos[i].getTypeName(); @@ -386,9 +490,57 @@ protected HashTableLoader getHashTableLoader(Configuration hconf) { return hashTableLoader; } + private void initializeFullOuterObjects() throws HiveException { + + // The Small Table key type jnfo is the same as Big Table's. + TypeInfo[] smallTableKeyTypeInfos = bigTableKeyTypeInfos; + final int allKeysSize = smallTableKeyTypeInfos.length; + + /* + * The VectorMapJoinFullOuter[Intersect]{Long|MultiKey|String}Operator outputs 0, 1, or more + * Small Key columns in the join result. + */ + allSmallTableKeyColumnNums = new int[allKeysSize]; + Arrays.fill(allSmallTableKeyColumnNums, -1); + allSmallTableKeyColumnIncluded = new boolean[allKeysSize]; + + final int outputKeysSize = fullOuterSmallTableKeyMapping.getCount(); + int[] outputKeyNums = fullOuterSmallTableKeyMapping.getInputColumns(); + int[] outputKeyOutputColumns = fullOuterSmallTableKeyMapping.getOutputColumns(); + for (int i = 0; i < outputKeysSize; i++) { + final int outputKeyNum = outputKeyNums[i]; + allSmallTableKeyColumnNums[outputKeyNum] = outputKeyOutputColumns[i]; + allSmallTableKeyColumnIncluded[outputKeyNum] = true; + } + + if (hashTableKeyType == HashTableKeyType.MULTI_KEY && + outputKeysSize > 0) { + + // UNDONE: boolean[] columnSortOrderIsDesc, byte[] columnNullMarker, byte[] columnNotNullMarker + smallTableKeyOuterVectorDeserializeRow = + new VectorDeserializeRow( + new BinarySortableDeserializeRead( + smallTableKeyTypeInfos, + /* useExternalBuffer */ true)); + smallTableKeyOuterVectorDeserializeRow.init( + allSmallTableKeyColumnNums, allSmallTableKeyColumnIncluded); + } + } + @Override protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); + + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !conf.isDynamicPartitionHashJoin() && + !conf.isFullOuterIntersect()) { + + // The auxiliary forward sends first-time match keys to the FULL OUTER INTERCEPT MapJoin + // operator.. + auxiliaryChildIndex = 1; + } + VectorExpression.doTransientInit(bigTableFilterExpressions); VectorExpression.doTransientInit(bigTableKeyExpressions); VectorExpression.doTransientInit(bigTableValueExpressions); @@ -405,23 +557,34 @@ protected void initializeOp(Configuration hconf) throws HiveException { /* * Create our vectorized copy row and deserialize row helper objects. */ - if (smallTableMapping.getCount() > 0) { - smallTableVectorDeserializeRow = + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + initializeFullOuterObjects(); + } + + if (smallTableValueMapping.getCount() > 0) { + smallTableValueVectorDeserializeRow = new VectorDeserializeRow( new LazyBinaryDeserializeRead( - smallTableMapping.getTypeInfos(), + smallTableValueMapping.getTypeInfos(), /* useExternalBuffer */ true)); - smallTableVectorDeserializeRow.init(smallTableMapping.getOutputColumns()); + smallTableValueVectorDeserializeRow.init(smallTableValueMapping.getOutputColumns()); } - if (bigTableRetainedMapping.getCount() > 0) { + if (bigTableRetainColumnMap.length > 0) { bigTableRetainedVectorCopy = new VectorCopyRow(); - bigTableRetainedVectorCopy.init(bigTableRetainedMapping); + bigTableRetainedVectorCopy.init( + bigTableRetainColumnMap, bigTableRetainTypeInfos); } - if (bigTableOuterKeyMapping.getCount() > 0) { - bigTableVectorCopyOuterKeys = new VectorCopyRow(); - bigTableVectorCopyOuterKeys.init(bigTableOuterKeyMapping); + if (nonOuterSmallTableKeyColumnMap.length > 0) { + nonOuterSmallTableKeyVectorCopy = new VectorCopyRow(); + nonOuterSmallTableKeyVectorCopy.init( + nonOuterSmallTableKeyColumnMap, nonOuterSmallTableKeyTypeInfos); + } + + if (outerSmallTableKeyMapping.getCount() > 0) { + outerSmallTableKeyVectorCopy = new VectorCopyRow(); + outerSmallTableKeyVectorCopy.init(outerSmallTableKeyMapping); } /* @@ -430,6 +593,7 @@ protected void initializeOp(Configuration hconf) throws HiveException { overflowBatch = setupOverflowBatch(); needCommonSetup = true; + needFirstBatchSetup = true; needHashTableSetup = true; if (LOG.isDebugEnabled()) { @@ -555,27 +719,44 @@ private void allocateOverflowBatchColumnVector(VectorizedRowBatch overflowBatch, /* * Common one time setup by native vectorized map join operator's processOp. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { + protected void commonSetup() throws HiveException { - if (LOG.isDebugEnabled()) { - LOG.debug("VectorMapJoinInnerCommonOperator commonSetup begin..."); - displayBatchColumns(batch, "batch"); - displayBatchColumns(overflowBatch, "overflowBatch"); + /* + * Make sure big table BytesColumnVectors have room for string values in the overflow batch... + */ + for (int column: bigTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); } - // Make sure big table BytesColumnVectors have room for string values in the overflow batch... - for (int column: bigTableByteColumnVectorColumns) { + for (int column : nonOuterSmallTableKeyByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; bytesColumnVector.initBuffer(); } + for (int column : outerSmallTableKeyByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + for (int column: smallTableByteColumnVectorColumns) { + BytesColumnVector bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; + bytesColumnVector.initBuffer(); + } + + batchCounter = 0; + rowCounter = 0; + } + + /* + * Common one time setup by native vectorized map join operator's first batch. + */ + public void firstBatchSetup(VectorizedRowBatch batch) throws HiveException { // Make sure small table BytesColumnVectors have room for string values in the big table and // overflow batchs... for (int column: smallTableByteColumnVectorColumns) { BytesColumnVector bytesColumnVector = (BytesColumnVector) batch.cols[column]; bytesColumnVector.initBuffer(); - bytesColumnVector = (BytesColumnVector) overflowBatch.cols[column]; - bytesColumnVector.initBuffer(); } // Setup a scratch batch that will be used to play back big table rows that were spilled @@ -583,6 +764,54 @@ protected void commonSetup(VectorizedRowBatch batch) throws HiveException { spillReplayBatch = VectorizedBatchUtil.makeLike(batch); } + public void hashTableSetup() throws HiveException { + } + + public abstract void processBatch(VectorizedRowBatch batch) throws HiveException; + + @Override + public void process(Object row, int tag) throws HiveException { + + VectorizedRowBatch batch = (VectorizedRowBatch) row; + alias = (byte) tag; + + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needFirstBatchSetup) { + + // Our one time first-batch method initialization. + firstBatchSetup(batch); + + needFirstBatchSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + batchCounter++; + + if (batch.size == 0) { + return; + } + + rowCounter += batch.size; + + processBatch(batch); + } + protected void displayBatchColumns(VectorizedRowBatch batch, String batchName) { LOG.debug(getLoggingPrefix() + " VectorMapJoinCommonOperator commonSetup " + batchName + " column count " + batch.numCols); for (int column = 0; column < batch.numCols; column++) { diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java new file mode 100644 index 0000000..97d6d68 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectLongOperator.java @@ -0,0 +1,119 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import java.io.IOException; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; +// Single-Column Long specific imports. +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; + +/* + * Specialized class for doing a vectorized map join that is a full outer join on a Single-Column + * Long using a hash map. + */ +public class VectorMapJoinFullOuterIntersectLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectLongOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column Long specific declarations. + */ + + // The one join column for this specialized class. + LongColumnVector joinColVector = (LongColumnVector) batch.cols[singleJoinColumn]; + long[] vector = joinColVector.vector; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + hashMap.lookupNoResult(vector[batchIndex], fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + hashMap.lookupNoResult(vector[batchIndex], fullOuterIntersectReadPos, matchTracker); + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java new file mode 100644 index 0000000..02a4c18 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectMultiKeyOperator.java @@ -0,0 +1,128 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is A FULL outer join on Multi-Key + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterIntersectMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Multi-Key specific declarations. + */ + + // None. + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + keyVectorSerializeWrite.setOutput(currentKeyOutput); + keyVectorSerializeWrite.serializeWrite(batch, batchIndex); + byte[] keyBytes = currentKeyOutput.getData(); + int keyLength = currentKeyOutput.getLength(); + hashMap.lookupNoResult( + keyBytes, 0, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } + + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java new file mode 100644 index 0000000..4ec6e7c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterIntersectStringOperator.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import java.io.IOException; + +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is an outer join on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterIntersectStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterIntersectStringOperator() { + super(); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterIntersectStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + fullOuterIntersectHashTableSetup(); + } + + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { + + try { + + /* + * Only do key matching tracking here -- do not generate any results. + * + * On closeOp, generateFullOuterSmallTableNoMatches will generate the no-match Small Table + * results. + */ + + /* + * Single-Column String specific declarations. + */ + + // The one join column for this specialized class. + BytesColumnVector joinColVector = (BytesColumnVector) batch.cols[singleJoinColumn]; + byte[][] vector = joinColVector.vector; + int[] start = joinColVector.start; + int[] length = joinColVector.length; + + if (batch.selectedInUse) { + + int[] selected = batch.selected; + final int logicalSize = batch.size; + for (int logicalIndex = 0; logicalIndex < logicalSize; logicalIndex++) { + final int batchIndex = selected[logicalIndex]; + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } else { + + final int size = batch.size; + for (int batchIndex = 0; batchIndex < size; batchIndex++) { + + byte[] keyBytes = vector[batchIndex]; + int keyStart = start[batchIndex]; + int keyLength = length[batchIndex]; + + hashMap.lookupNoResult( + keyBytes, keyStart, keyLength, fullOuterIntersectReadPos, matchTracker); + } + } + } catch (IOException e) { + throw new HiveException(e); + } catch (Exception e) { + throw new HiveException(e); + } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java new file mode 100644 index 0000000..4267f8a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterLongOperator.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is a full outer join on a Single-Column + * Long using a hash map. + */ +public class VectorMapJoinFullOuterLongOperator extends VectorMapJoinOuterLongOperator { + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterLongOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterLongOperator() { + super(); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterLongOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java new file mode 100644 index 0000000..ff119e5 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterMultiKeyOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is A FULL outer join on Multi-Key + * using a hash map. + */ +public class VectorMapJoinFullOuterMultiKeyOperator extends VectorMapJoinOuterMultiKeyOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinFullOuterMultiKeyOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterMultiKeyOperator() { + super(); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java new file mode 100644 index 0000000..9ab4bf2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinFullOuterStringOperator.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; +import org.apache.hadoop.hive.ql.CompilationOpContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; + +/* + * Specialized class for doing a vectorized map join that is an outer join on a Single-Column String + * using a hash map. + */ +public class VectorMapJoinFullOuterStringOperator extends VectorMapJoinOuterStringOperator { + + private static final long serialVersionUID = 1L; + + //------------------------------------------------------------------------------------------------ + + private static final String CLASS_NAME = VectorMapJoinOuterStringOperator.class.getName(); + // private static final Logger LOG = LoggerFactory.getLogger(CLASS_NAME); + + protected String getLoggingPrefix() { + return super.getLoggingPrefix(CLASS_NAME); + } + + //--------------------------------------------------------------------------- + // Pass-thru constructors. + // + + /** Kryo ctor. */ + protected VectorMapJoinFullOuterStringOperator() { + super(); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx) { + super(ctx); + } + + public VectorMapJoinFullOuterStringOperator(CompilationOpContext ctx, OperatorDesc conf, + VectorizationContext vContext, VectorDesc vectorDesc) throws HiveException { + super(ctx, conf, vContext, vectorDesc); + } + + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + // Turn on key matching, and for the shared Small-Table case, setup forwarding to Intersect. + fullOuterHashTableSetup(); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java index 92ec1ee..32cb1cb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinGenerateResultOperator.java @@ -93,9 +93,6 @@ private transient Thread ownThread; private transient int interruptCheckCounter = CHECK_INTERRUPT_PER_OVERFLOW_BATCHES; - // Debug display. - protected transient long batchCounter; - /** Kryo ctor. */ protected VectorMapJoinGenerateResultOperator() { super(); @@ -124,13 +121,6 @@ private void setUpInterruptChecking() { ownThread = Thread.currentThread(); } - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); - - batchCounter = 0; - - } - //------------------------------------------------------------------------------------------------ protected void performValueExpressions(VectorizedRowBatch batch, @@ -157,24 +147,24 @@ protected void performValueExpressions(VectorizedRowBatch batch, batch.selectedInUse = saveSelectedInUse; } - protected void doSmallTableDeserializeRow(VectorizedRowBatch batch, int batchIndex, + protected void doSmallTableValueDeserializeRow(VectorizedRowBatch batch, int batchIndex, ByteSegmentRef byteSegmentRef, VectorMapJoinHashMapResult hashMapResult) throws HiveException { byte[] bytes = byteSegmentRef.getBytes(); int offset = (int) byteSegmentRef.getOffset(); int length = byteSegmentRef.getLength(); - smallTableVectorDeserializeRow.setBytes(bytes, offset, length); + smallTableValueVectorDeserializeRow.setBytes(bytes, offset, length); try { // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. - smallTableVectorDeserializeRow.deserializeByRef(batch, batchIndex); + smallTableValueVectorDeserializeRow.deserializeByRef(batch, batchIndex); } catch (Exception e) { throw new HiveException( "\nHashMapResult detail: " + hashMapResult.getDetailedHashMapResultPositionString() + "\nDeserializeRead detail: " + - smallTableVectorDeserializeRow.getDetailedReadPositionString(), + smallTableValueVectorDeserializeRow.getDetailedReadPositionString(), e); } } @@ -215,22 +205,23 @@ protected int generateHashMapResultSingleValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; - // Outer key copying is only used when we are using the input BigTable batch as the output. - // - if (bigTableVectorCopyOuterKeys != null) { - // Copy within row. - bigTableVectorCopyOuterKeys.copyByReference(batch, batchIndex, batch, batchIndex); + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // same batch by reference. + // + outerSmallTableKeyVectorCopy.copyByReference( + batch, batchIndex, + batch, batchIndex); } - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(batch, batchIndex, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(batch, batchIndex, byteSegmentRef, hashMapResult); } - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, "generateHashMapResultSingleValue big table"); - // Use the big table row as output. batch.selected[numSel++] = batchIndex; } @@ -273,26 +264,45 @@ protected void generateHashMapResultMultiValue(VectorizedRowBatch batch, for (int i = 0; i < duplicateCount; i++) { - int batchIndex = allMatchs[allMatchesIndex + i]; + final int batchIndex = allMatchs[allMatchesIndex + i]; ByteSegmentRef byteSegmentRef = hashMapResult.first(); while (byteSegmentRef != null) { // Copy the BigTable values into the overflow batch. Since the overflow batch may // not get flushed here, we must copy by value. - // Note this includes any outer join keys that need to go into the small table "area". + // if (bigTableRetainedVectorCopy != null) { - bigTableRetainedVectorCopy.copyByValue(batch, batchIndex, - overflowBatch, overflowBatch.size); + bigTableRetainedVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - if (smallTableVectorDeserializeRow != null) { + if (nonOuterSmallTableKeyVectorCopy != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, - byteSegmentRef, hashMapResult); + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); + } + + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area across + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, overflowBatch.size); } - // VectorizedBatchUtil.debugDisplayOneRow(overflowBatch, overflowBatch.size, "generateHashMapResultMultiValue overflow"); + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } overflowBatch.size++; if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { @@ -333,8 +343,8 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, // Fill up as much of the overflow batch as possible with small table values. while (byteSegmentRef != null) { - if (smallTableVectorDeserializeRow != null) { - doSmallTableDeserializeRow(overflowBatch, overflowBatch.size, + if (smallTableValueVectorDeserializeRow != null) { + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, byteSegmentRef, hashMapResult); } @@ -361,9 +371,40 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, int batchIndex = allMatchs[allMatchesIndex + i]; if (bigTableRetainedVectorCopy != null) { + // The one big table row's values repeat. - bigTableRetainedVectorCopy.copyByReference(batch, batchIndex, overflowBatch, 0); - for (int column : bigTableRetainedMapping.getOutputColumns()) { + bigTableRetainedVectorCopy.copyByReference( + batch, batchIndex, + overflowBatch, 0); + for (int column : bigTableRetainColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + if (nonOuterSmallTableKeyVectorCopy != null) { + + // For non-[FULL] OUTER MapJoin, copy non-retained Big Table keys to the Big Table area + // across to overflow batch by value so Small Key projection will see its keys... + // + nonOuterSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + for (int column : nonOuterSmallTableKeyColumnMap) { + overflowBatch.cols[column].isRepeating = true; + } + } + + int[] outerSmallTableKeyColumnMap = null; + if (outerSmallTableKeyVectorCopy != null) { + + // For [FULL] OUTER MapJoin, copy Big Table keys to Small Table area within + // to overflow batch by value. + // + outerSmallTableKeyVectorCopy.copyByValue( + batch, batchIndex, + overflowBatch, 0); + outerSmallTableKeyColumnMap = outerSmallTableKeyMapping.getOutputColumns(); + for (int column : outerSmallTableKeyColumnMap) { overflowBatch.cols[column].isRepeating = true; } } @@ -373,10 +414,20 @@ private void generateHashMapResultLargeMultiValue(VectorizedRowBatch batch, forwardOverflowNoReset(); // Hand reset the big table columns. - for (int column : bigTableRetainedMapping.getOutputColumns()) { + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + for (int column : nonOuterSmallTableKeyColumnMap) { ColumnVector colVector = overflowBatch.cols[column]; colVector.reset(); } + if (outerSmallTableKeyColumnMap != null) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.reset(); + } + } } byteSegmentRef = hashMapResult.next(); @@ -485,13 +536,9 @@ private void spillSerializeRow(VectorizedRowBatch batch, int batchIndex, VectorRowBytesContainer rowBytesContainer = hp.getMatchfileRowBytesContainer(); Output output = rowBytesContainer.getOuputForRowBytes(); -// int offset = output.getLength(); bigTableVectorSerializeRow.setOutputAppend(output); bigTableVectorSerializeRow.serializeWrite(batch, batchIndex); -// int length = output.getLength() - offset; rowBytesContainer.finishRow(); - -// LOG.debug("spillSerializeRow spilled batchIndex " + batchIndex + ", length " + length); } protected void spillHashMapBatch(VectorizedRowBatch batch, @@ -637,7 +684,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException batch.projectionSize = outputProjection.length; batch.projectedColumns = outputProjection; - forward(batch, null, true); + vectorForward(batch); // Revert the projected columns back, because batch can be re-used by our parent operators. batch.projectionSize = originalProjectionSize; @@ -649,7 +696,7 @@ public void forwardBigTableBatch(VectorizedRowBatch batch) throws HiveException * Forward the overflow batch and reset the batch. */ protected void forwardOverflow() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); overflowBatch.reset(); maybeCheckInterrupt(); } @@ -666,7 +713,7 @@ private void maybeCheckInterrupt() throws HiveException { * Forward the overflow batch, but do not reset the batch. */ private void forwardOverflowNoReset() throws HiveException { - forward(overflowBatch, null, true); + vectorForward(overflowBatch); } /* @@ -679,6 +726,11 @@ private void forwardOverflowNoReset() throws HiveException { @Override public void closeOp(boolean aborted) throws HiveException { super.closeOp(aborted); + + // NOTE: The closeOp call on super MapJoinOperator can trigger Hybrid Grace additional + // NOTE: processing and also FULL OUTER MapJoin non-match Small Table result generation. So, + // NOTE: we flush the overflowBatch after the call. + // if (!aborted && overflowBatch.size > 0) { forwardOverflow(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java index f791d95..35ddddd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyGenerateResultOperator.java @@ -103,25 +103,25 @@ public VectorMapJoinInnerBigOnlyGenerateResultOperator(CompilationOpContext ctx, /* * Setup our inner big table only join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner big-table only join specific. VectorMapJoinHashMultiSet baseHashMultiSet = (VectorMapJoinHashMultiSet) vectorMapJoinHashTable; - hashMultiSetResults = new VectorMapJoinHashMultiSetResult[batch.DEFAULT_SIZE]; + hashMultiSetResults = new VectorMapJoinHashMultiSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMultiSetResults.length; i++) { hashMultiSetResults[i] = baseHashMultiSet.createHashMultiSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesValueCounts = new long[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesValueCounts = new long[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java index 678fa42..30a19b8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyLongOperator.java @@ -102,45 +102,36 @@ public VectorMapJoinInnerBigOnlyLongOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { + protected void commonSetup() throws HiveException { + super.commonSetup(); - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ + /* + * Initialize Single-Column Long members for this specialized class. + */ - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. - - /* - * Get our Single-Column Long hash multi-set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; - useMinMax = hashMultiSet.useMinMax(); - if (useMinMax) { - min = hashMultiSet.min(); - max = hashMultiSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ + + hashMultiSet = (VectorMapJoinLongHashMultiSet) vectorMapJoinHashTable; + useMinMax = hashMultiSet.useMinMax(); + if (useMinMax) { + min = hashMultiSet.min(); + max = hashMultiSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -153,11 +144,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java index 866aa60..f587517 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -109,45 +108,40 @@ public VectorMapJoinInnerBigOnlyMultiKeyOperator(CompilationOpContext ctx, Opera // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Multi-Key members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Multi-Key members for this specialized class. - */ + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column Long hash multi-set information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash multi-set information for this specialized class. + */ - /* - * Get our Multi-Key hash multi-set information for this specialized class. - */ - - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -160,11 +154,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java index a0c3b9c..e373db1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerBigOnlyStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; @@ -98,40 +97,31 @@ public VectorMapJoinInnerBigOnlyStringOperator(CompilationOpContext ctx, Operato // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash multi-set information for this specialized class. - */ + /* + * Get our Single-Column String hash multi-set information for this specialized class. + */ - hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + hashMultiSet = (VectorMapJoinBytesHashMultiSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner big-only join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java index ea2c04d..dc5d046 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerGenerateResultOperator.java @@ -108,26 +108,26 @@ public VectorMapJoinInnerGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our inner join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Inner join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } /* @@ -142,7 +142,7 @@ protected void innerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java index 36404bc..5ac606a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerLongOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -101,45 +100,36 @@ public VectorMapJoinInnerLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -151,11 +141,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java index 620101f..cdee3fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerMultiKeyOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -107,45 +106,36 @@ public VectorMapJoinInnerMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash map information for this specialized class. - */ + /* + * Get our Multi-Key hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -157,11 +147,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java index d99d514..8e6697e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinInnerStringOperator.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -97,40 +96,31 @@ public VectorMapJoinInnerStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash map information for this specialized class. - */ + /* + * Get our Single-Column String hash map information for this specialized class. + */ - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an inner join. @@ -142,11 +132,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java index f68d4c4..71ec56b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiGenerateResultOperator.java @@ -89,21 +89,21 @@ public VectorMapJoinLeftSemiGenerateResultOperator(CompilationOpContext ctx, Ope /* * Setup our left semi join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Semi join specific. VectorMapJoinHashSet baseHashSet = (VectorMapJoinHashSet) vectorMapJoinHashTable; - hashSetResults = new VectorMapJoinHashSetResult[batch.DEFAULT_SIZE]; + hashSetResults = new VectorMapJoinHashSetResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashSetResults.length; i++) { hashSetResults[i] = baseHashSet.createHashSetResult(); } - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; } //----------------------------------------------------------------------------------------------- diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java index 4185c5b..40e7cfa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiLongOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashSet; @@ -102,45 +101,36 @@ public VectorMapJoinLeftSemiLongOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + protected void commonSetup() throws HiveException { + super.commonSetup(); - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Initialize Single-Column Long members for this specialized class. + */ - /* - * Get our Single-Column Long hash set information for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; - useMinMax = hashSet.useMinMax(); - if (useMinMax) { - min = hashSet.min(); - max = hashSet.max(); - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash set information for this specialized class. + */ + + hashSet = (VectorMapJoinLongHashSet) vectorMapJoinHashTable; + useMinMax = hashSet.useMinMax(); + if (useMinMax) { + min = hashSet.min(); + max = hashSet.max(); + } + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -153,11 +143,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java index 541e7fa..e5d9fda 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -108,45 +107,36 @@ public VectorMapJoinLeftSemiMultiKeyOperator(CompilationOpContext ctx, OperatorD // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ - - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + /* + * Initialize Multi-Key members for this specialized class. + */ - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - needCommonSetup = false; - } + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Multi-Key hash set information for this specialized class. - */ + /* + * Get our Multi-Key hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -159,11 +149,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java index 6785bce..df900a1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinLeftSemiStringOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; @@ -98,40 +97,31 @@ public VectorMapJoinLeftSemiStringOperator(CompilationOpContext ctx, OperatorDes // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Single-Column String members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; + /* + * Initialize Single-Column String members for this specialized class. + */ - needCommonSetup = false; - } + singleJoinColumn = bigTableKeyColumnMap[0]; + } - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - /* - * Get our Single-Column String hash set information for this specialized class. - */ + /* + * Get our Single-Column String hash set information for this specialized class. + */ - hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + hashSet = (VectorMapJoinBytesHashSet) vectorMapJoinHashTable; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { // Do the per-batch setup for an left semi join. @@ -144,11 +134,7 @@ public void process(Object row, int tag) throws HiveException { } final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } return; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java index 8a6c817..10daf77 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterGenerateResultOperator.java @@ -24,16 +24,23 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.VectorDesc; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; /** @@ -114,6 +121,18 @@ protected transient int[] noMatchs; protected transient int[] merged; + /* + * Small Table key match tracking used for FULL OUTER MapJoin. Otherwise, null. + * Since the Small Table hash table can be shared, we need this non-shared private object for + * our key match tracking. + */ + protected MatchTracker matchTracker; + + protected transient boolean isFullOuterForwardKeysToIntersect; + protected transient WriteBuffers.Position fullOuterIntersectReadPos; + + protected transient int[] fullOuterForwardKeys; + /** Kryo ctor. */ protected VectorMapJoinOuterGenerateResultOperator() { super(); @@ -131,32 +150,37 @@ public VectorMapJoinOuterGenerateResultOperator(CompilationOpContext ctx, Operat /* * Setup our outer join specific members. */ - protected void commonSetup(VectorizedRowBatch batch) throws HiveException { - super.commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); // Outer join specific. VectorMapJoinHashMap baseHashMap = (VectorMapJoinHashMap) vectorMapJoinHashTable; - hashMapResults = new VectorMapJoinHashMapResult[batch.DEFAULT_SIZE]; + hashMapResults = new VectorMapJoinHashMapResult[VectorizedRowBatch.DEFAULT_SIZE]; for (int i = 0; i < hashMapResults.length; i++) { hashMapResults[i] = baseHashMap.createHashMapResult(); } - inputSelected = new int[batch.DEFAULT_SIZE]; + inputSelected = new int[VectorizedRowBatch.DEFAULT_SIZE]; - allMatchs = new int[batch.DEFAULT_SIZE]; + allMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; - equalKeySeriesHashMapResultIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesAllMatchIndices = new int[batch.DEFAULT_SIZE]; - equalKeySeriesIsSingleValue = new boolean[batch.DEFAULT_SIZE]; - equalKeySeriesDuplicateCounts = new int[batch.DEFAULT_SIZE]; + equalKeySeriesHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesAllMatchIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesIsSingleValue = new boolean[VectorizedRowBatch.DEFAULT_SIZE]; + equalKeySeriesDuplicateCounts = new int[VectorizedRowBatch.DEFAULT_SIZE]; - spills = new int[batch.DEFAULT_SIZE]; - spillHashMapResultIndices = new int[batch.DEFAULT_SIZE]; + spills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + spillHashMapResultIndices = new int[VectorizedRowBatch.DEFAULT_SIZE]; - nonSpills = new int[batch.DEFAULT_SIZE]; - noMatchs = new int[batch.DEFAULT_SIZE]; - merged = new int[batch.DEFAULT_SIZE]; + nonSpills = new int[VectorizedRowBatch.DEFAULT_SIZE]; + noMatchs = new int[VectorizedRowBatch.DEFAULT_SIZE]; + merged = new int[VectorizedRowBatch.DEFAULT_SIZE]; + + matchTracker = null; + isFullOuterForwardKeysToIntersect = false; + fullOuterIntersectReadPos = null; + fullOuterForwardKeys = null; } @@ -174,15 +198,16 @@ protected void outerPerBatchSetup(VectorizedRowBatch batch) { // For join operators that can generate small table results, reset their // (target) scratch columns. - for (int column : smallTableOutputVectorColumns) { + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector bigTableOuterKeyColumn = batch.cols[column]; + bigTableOuterKeyColumn.reset(); + } + + for (int column : smallTableValueColumnMap) { ColumnVector smallTableColumn = batch.cols[column]; smallTableColumn.reset(); } - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector bigTableOuterKeyColumn = batch.cols[column]; - bigTableOuterKeyColumn.reset(); - } } /** @@ -569,27 +594,28 @@ public void finishOuter(VectorizedRowBatch batch, protected void generateOuterNulls(VectorizedRowBatch batch, int[] noMatchs, int noMatchSize) throws IOException, HiveException { - // Set null information in the small table results area. + // Set null information in the small table results area. - for (int i = 0; i < noMatchSize; i++) { - int batchIndex = noMatchs[i]; + for (int i = 0; i < noMatchSize; i++) { + int batchIndex = noMatchs[i]; - // Mark any scratch small table scratch columns that would normally receive a copy of the - // key as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } - // Small table values are set to null. - for (int column : smallTableOutputVectorColumns) { - ColumnVector colVector = batch.cols[column]; - colVector.noNulls = false; - colVector.isNull[batchIndex] = true; - } - } - } + // Small table values are set to null. + for (int column : smallTableValueColumnMap) { + ColumnVector colVector = batch.cols[column]; + colVector.noNulls = false; + colVector.isNull[batchIndex] = true; + } + } + } /** * Generate the outer join output results for one vectorized row batch with a repeated key. @@ -734,20 +760,345 @@ public void finishOuterRepeated(VectorizedRowBatch batch, JoinUtil.JoinResult jo */ protected void generateOuterNullsRepeatedAll(VectorizedRowBatch batch) throws HiveException { - for (int column : smallTableOutputVectorColumns) { + // Mark any scratch small table scratch columns that would normally receive a copy of the + // key as null, too. + // + for (int column : outerSmallTableKeyColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } - // Mark any scratch small table scratch columns that would normally receive a copy of the key - // as null, too. - for (int column : bigTableOuterKeyOutputVectorColumns) { + for (int column : smallTableValueColumnMap) { ColumnVector colVector = batch.cols[column]; colVector.noNulls = false; colVector.isNull[0] = true; colVector.isRepeating = true; } } + + private void markBigTableColumnsAsNullRepeating() { + + /* + * For non-match FULL OUTER Small Table results, the Big Table columns are all NULL. + */ + for (int column : bigTableRetainColumnMap) { + ColumnVector colVector = overflowBatch.cols[column]; + colVector.isRepeating = true; + colVector.noNulls = false; + colVector.isNull[0] = true; + } + } + + @Override + protected void generateFullOuterSmallTableNoMatches() throws HiveException { + + if (!conf.isDynamicPartitionHashJoin() && !conf.isFullOuterIntersect()) { + + // The FULL OUTER MapJoin INTERSECT operator does the non-match Small Table + // result work. + return; + } + + /* + * For dynamic partition hash join, both the Big Table and Small Table are partitioned (sent) + * to the Reducer using the key hash code. So, we can generate the non-match Small Table + * results locally. + * + * Or, for Intersect, we have been tracking the matched keys received from all the FULL OUTER + * MapJoin operators. So, we can generate the non-match Small Table results in this + * centralized operator. + * + * Scan the Small Table for keys that didn't match and generate the non-matchs into the + * overflowBatch. + */ + + /* + * If there were no matched keys sent, we need to do our common initialization. + */ + if (needCommonSetup) { + + // Our one time process method initialization. + commonSetup(); + + needCommonSetup = false; + } + + if (needHashTableSetup) { + + // Setup our hash table specialization. It will be the first time the process + // method is called, or after a Hybrid Grace reload. + + hashTableSetup(); + + needHashTableSetup = false; + } + + /* + * To support fancy NULL repeating columns, let's flush the overflowBatch if it has anything. + */ + if (overflowBatch.size > 0) { + forwardOverflow(); + } + markBigTableColumnsAsNullRepeating(); + + switch (hashTableKeyType) { + case BOOLEAN: + case BYTE: + case SHORT: + case INT: + case LONG: + generateFullOuterLongKeySmallTableNoMatches(); + break; + case STRING: + generateFullOuterStringKeySmallTableNoMatches(); + break; + case MULTI_KEY: + generateFullOuterMultiKeySmallTableNoMatches(); + break; + default: + throw new RuntimeException("Unexpected hash table key type " + hashTableKeyType); + } + } + + protected void generateFullOuterLongKeySmallTableNoMatches() + throws HiveException { + + final LongColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (LongColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinLongHashMap hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final long longKey; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (!isKeyNull) { + longKey = nonMatchedIterator.getNonMatchedLongKey(); + } else { + longKey = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.vector[overflowBatch.size] = longKey; + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + private void doSmallTableKeyDeserializeRow(VectorizedRowBatch batch, int batchIndex, + byte[] keyBytes, int keyOffset, int keyLength) + throws HiveException { + + smallTableKeyOuterVectorDeserializeRow.setBytes(keyBytes, keyOffset, keyLength); + + try { + // Our hash tables are immutable. We can safely do by reference STRING, CHAR/VARCHAR, etc. + smallTableKeyOuterVectorDeserializeRow.deserializeByRef(batch, batchIndex); + } catch (Exception e) { + throw new HiveException( + "\nDeserializeRead detail: " + + smallTableKeyOuterVectorDeserializeRow.getDetailedReadPositionString(), + e); + } + } + + protected void generateFullOuterMultiKeySmallTableNoMatches() throws HiveException { + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + nonMatchedIterator.readNonMatchedBytesKey(); + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + final int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + final int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (smallTableKeyOuterVectorDeserializeRow != null) { + doSmallTableKeyDeserializeRow(overflowBatch, overflowBatch.size, + keyBytes, keyOffset, keyLength); + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + + // NOTE: We don't have to deal with FULL OUTER All-NULL key values like we do for single-column + // LONG and STRING because we do store them in the hash map... + } + + protected void generateFullOuterStringKeySmallTableNoMatches() throws HiveException { + + final BytesColumnVector singleSmallTableKeyOutputColumnVector; + if (allSmallTableKeyColumnIncluded[0]) { + singleSmallTableKeyOutputColumnVector = + (BytesColumnVector) overflowBatch.cols[allSmallTableKeyColumnNums[0]]; + } else { + singleSmallTableKeyOutputColumnVector = null; + } + + VectorMapJoinBytesHashMap hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + hashMap.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + + final byte[] keyBytes; + final int keyOffset; + final int keyLength; + boolean isKeyNull = !nonMatchedIterator.readNonMatchedBytesKey(); + if (!isKeyNull) { + keyBytes = nonMatchedIterator.getNonMatchedBytes(); + keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + } else { + keyBytes = null; + keyOffset = 0; + keyLength = 0; + } + + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + + ByteSegmentRef byteSegmentRef = hashMapResult.first(); + while (byteSegmentRef != null) { + + // NOTE: Big Table result columns were marked repeating NULL already. + + if (singleSmallTableKeyOutputColumnVector != null) { + if (isKeyNull) { + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = true; + singleSmallTableKeyOutputColumnVector.noNulls = false; + } else { + singleSmallTableKeyOutputColumnVector.setVal( + overflowBatch.size, + keyBytes, keyOffset, keyLength); + singleSmallTableKeyOutputColumnVector.isNull[overflowBatch.size] = false; + } + } + + if (smallTableValueVectorDeserializeRow != null) { + + doSmallTableValueDeserializeRow(overflowBatch, overflowBatch.size, + byteSegmentRef, hashMapResult); + } + + overflowBatch.size++; + if (overflowBatch.size == overflowBatch.DEFAULT_SIZE) { + forwardOverflow(); + markBigTableColumnsAsNullRepeating(); + } + byteSegmentRef = hashMapResult.next(); + } + } + } + + protected void fullOuterHashTableSetup() { + + // Always track key matches for FULL OUTER. + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + if (!conf.isDynamicPartitionHashJoin()) { + + // When the Small Table is shared among all Reducers, FULL OUTER MapJoin we must forward + // matched keys to Intersect. + isFullOuterForwardKeysToIntersect = true; + fullOuterForwardKeys = new int[VectorizedRowBatch.DEFAULT_SIZE]; + } + } + + protected void fullOuterIntersectHashTableSetup() { + + matchTracker = vectorMapJoinHashTable.createMatchTracker(); + + fullOuterIntersectReadPos = new WriteBuffers.Position(); + } + + protected void forwardFullOuterKeysToInterset(VectorizedRowBatch batch, + int fullOuterForwardKeyCount) throws HiveException { + + // Save original projection. + int[] originalProjections = batch.projectedColumns; + int originalProjectionSize = batch.projectionSize; + + // Save selected. + int[] originalSelected = batch.selected; + boolean originalSelectedInUse = batch.selectedInUse; + int originalSize = batch.size; + + // Project with the output of our operator. + batch.projectionSize = outputProjection.length; + batch.projectedColumns = outputProjection; + + // Forward just the rows whose key had a first-time match. + batch.selected = fullOuterForwardKeys; + batch.selectedInUse = true; + batch.size = fullOuterForwardKeyCount; + + vectorForwardAuxiliary(batch); + + // Revert the projected columns back, because batch can be re-used by our parent operators. + batch.projectionSize = originalProjectionSize; + batch.projectedColumns = originalProjections; + + batch.selected = originalSelected; + batch.selectedInUse = originalSelectedInUse; + batch.size = originalSize; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java index be05cc2..883c6e0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterLongOperator.java @@ -32,7 +32,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column Long hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; @@ -65,7 +64,7 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinLongHashMap hashMap; + protected transient VectorMapJoinLongHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column Long specific members. @@ -77,7 +76,7 @@ protected String getLoggingPrefix() { private transient long max; // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -102,55 +101,41 @@ public VectorMapJoinOuterLongOperator(CompilationOpContext ctx, OperatorDesc con // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Single-Column Long members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column Long members for this specialized class. - */ - - singleJoinColumn = bigTableKeyColumnMap[0]; - - needCommonSetup = false; - } - - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + singleJoinColumn = bigTableKeyColumnMap[0]; + } - /* - * Get our Single-Column Long hash map information for this specialized class. - */ + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); + + /* + * Get our Single-Column Long hash map information for this specialized class. + */ + + hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; + useMinMax = hashMap.useMinMax(); + if (useMinMax) { + min = hashMap.min(); + max = hashMap.max(); + } - hashMap = (VectorMapJoinLongHashMap) vectorMapJoinHashTable; - useMinMax = hashMap.useMinMax(); - if (useMinMax) { - min = hashMap.min(); - max = hashMap.max(); - } + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -160,9 +145,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -174,19 +156,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -205,6 +174,11 @@ public void process(Object row, int tag) throws HiveException { long[] vector = joinColVector.vector; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column Long check for repeating. */ @@ -234,12 +208,16 @@ public void process(Object row, int tag) throws HiveException { } else { // Handle *repeated* join key, if found. long key = vector[0]; - // LOG.debug(CLASS_NAME + " repeated key " + key); if (useMinMax && (key < min || key > max)) { // Out of range for whole batch. joinResult = JoinUtil.JoinResult.NOMATCH; } else { - joinResult = hashMap.lookup(key, hashMapResults[0]); + joinResult = hashMap.lookup(key, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } } @@ -247,9 +225,6 @@ public void process(Object row, int tag) throws HiveException { * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -258,10 +233,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -286,8 +257,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column Long outer null detection. */ @@ -305,7 +274,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -354,23 +322,25 @@ public void process(Object row, int tag) throws HiveException { // Key out of range for whole hash table. saveJoinResult = JoinUtil.JoinResult.NOMATCH; } else { - saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(currentKey, hashMapResults[hashMapResultCount], + matchTracker); } - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " New Key " + currentKey + " " + saveJoinResult.name()); - /* * Common outer join result processing. */ switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -381,11 +351,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -393,7 +361,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -403,13 +370,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -451,9 +414,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java index 70f88e3..3050333 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterMultiKeyOperator.java @@ -31,7 +31,6 @@ import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Multi-Key hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -69,17 +68,17 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Multi-Key specific members. // // Object that can take a set of columns in row in a vectorized row batch and serialized it. - private transient VectorSerializeRow keyVectorSerializeWrite; + protected transient VectorSerializeRow keyVectorSerializeWrite; // The BinarySortable serialization of the current key. - private transient Output currentKeyOutput; + protected transient Output currentKeyOutput; // The BinarySortable serialization of the saved key for a possible series of equal keys. private transient Output saveKeyOutput; @@ -107,55 +106,41 @@ public VectorMapJoinOuterMultiKeyOperator(CompilationOpContext ctx, OperatorDesc // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; - - alias = (byte) tag; - - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); + protected void commonSetup() throws HiveException { + super.commonSetup(); - /* - * Initialize Multi-Key members for this specialized class. - */ + /* + * Initialize Multi-Key members for this specialized class. + */ - keyVectorSerializeWrite = new VectorSerializeRow( - new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); - keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); + keyVectorSerializeWrite = new VectorSerializeRow( + new BinarySortableSerializeWrite(bigTableKeyColumnMap.length)); + keyVectorSerializeWrite.init(bigTableKeyTypeInfos, bigTableKeyColumnMap); - currentKeyOutput = new Output(); - saveKeyOutput = new Output(); + currentKeyOutput = new Output(); + saveKeyOutput = new Output(); + } - needCommonSetup = false; - } + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + /* + * Get our Multi-Key hash map information for this specialized class. + */ - /* - * Get our Multi-Key hash map information for this specialized class. - */ + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -165,9 +150,6 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } @@ -179,19 +161,6 @@ public void process(Object row, int tag) throws HiveException { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -208,6 +177,11 @@ public void process(Object row, int tag) throws HiveException { // None. /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Multi-Key Long check for repeating. */ @@ -259,16 +233,18 @@ public void process(Object row, int tag) throws HiveException { keyVectorSerializeWrite.serializeWrite(batch, 0); byte[] keyBytes = currentKeyOutput.getData(); int keyLength = currentKeyOutput.getLength(); - joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* * Common repeated join result processing. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " repeated joinResult " + joinResult.name()); - } finishOuterRepeated(batch, joinResult, hashMapResults[0], someRowsFilteredOut, inputSelectedInUse, inputLogicalSize); } else { @@ -277,10 +253,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -305,8 +277,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Multi-Key outer null detection. */ @@ -325,7 +295,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -375,7 +344,9 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = saveKeyOutput.getData(); int keyLength = saveKeyOutput.getLength(); - saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, 0, keyLength, + hashMapResults[hashMapResultCount], matchTracker); + /* * Common outer join result processing. @@ -383,12 +354,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -399,11 +373,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -411,7 +383,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -421,13 +392,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -469,9 +436,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java index 714f5ec..6b41776 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/VectorMapJoinOuterStringOperator.java @@ -26,11 +26,11 @@ import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.OperatorDesc; - import org.apache.hadoop.hive.ql.plan.VectorDesc; // Single-Column String hash table import. import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; @@ -65,14 +65,14 @@ protected String getLoggingPrefix() { //--------------------------------------------------------------------------- // The hash map for this specialized class. - private transient VectorMapJoinBytesHashMap hashMap; + protected transient VectorMapJoinBytesHashMap hashMap; //--------------------------------------------------------------------------- // Single-Column String specific members. // // The column number for this one column join specialization. - private transient int singleJoinColumn; + protected transient int singleJoinColumn; //--------------------------------------------------------------------------- // Pass-thru constructors. @@ -97,50 +97,36 @@ public VectorMapJoinOuterStringOperator(CompilationOpContext ctx, OperatorDesc c // @Override - public void process(Object row, int tag) throws HiveException { - - try { - VectorizedRowBatch batch = (VectorizedRowBatch) row; + protected void commonSetup() throws HiveException { + super.commonSetup(); - alias = (byte) tag; + /* + * Initialize Single-Column String members for this specialized class. + */ - if (needCommonSetup) { - // Our one time process method initialization. - commonSetup(batch); - - /* - * Initialize Single-Column String members for this specialized class. - */ + singleJoinColumn = bigTableKeyColumnMap[0]; + } - singleJoinColumn = bigTableKeyColumnMap[0]; + @Override + public void hashTableSetup() throws HiveException { + super.hashTableSetup(); - needCommonSetup = false; - } + /* + * Get our Single-Column String hash map information for this specialized class. + */ - if (needHashTableSetup) { - // Setup our hash table specialization. It will be the first time the process - // method is called, or after a Hybrid Grace reload. + hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; - /* - * Get our Single-Column String hash map information for this specialized class. - */ - - hashMap = (VectorMapJoinBytesHashMap) vectorMapJoinHashTable; + isFullOuterForwardKeysToIntersect = false; + } - needHashTableSetup = false; - } + @Override + public void processBatch(VectorizedRowBatch batch) throws HiveException { - batchCounter++; + try { final int inputLogicalSize = batch.size; - if (inputLogicalSize == 0) { - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " empty"); - } - return; - } - // Do the per-batch setup for an outer join. outerPerBatchSetup(batch); @@ -150,33 +136,17 @@ public void process(Object row, int tag) throws HiveException { // later. boolean inputSelectedInUse = batch.selectedInUse; if (inputSelectedInUse) { - // if (!verifyMonotonicallyIncreasing(batch.selected, batch.size)) { - // throw new HiveException("batch.selected is not in sort order and unique"); - // } System.arraycopy(batch.selected, 0, inputSelected, 0, inputLogicalSize); } // Filtering for outer join just removes rows available for hash table matching. - boolean someRowsFilteredOut = false; + boolean someRowsFilteredOut = false; if (bigTableFilterExpressions.length > 0) { // Since the input for (VectorExpression ve : bigTableFilterExpressions) { ve.evaluate(batch); } someRowsFilteredOut = (batch.size != inputLogicalSize); - if (LOG.isDebugEnabled()) { - if (batch.selectedInUse) { - if (inputSelectedInUse) { - LOG.debug(CLASS_NAME + - " inputSelected " + intArrayToRangesString(inputSelected, inputLogicalSize) + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } else { - LOG.debug(CLASS_NAME + - " inputLogicalSize " + inputLogicalSize + - " filtered batch.selected " + intArrayToRangesString(batch.selected, batch.size)); - } - } - } } // Perform any key expressions. Results will go into scratch columns. @@ -197,6 +167,11 @@ public void process(Object row, int tag) throws HiveException { int[] length = joinColVector.length; /* + * For FULL OUTER only. + */ + int fullOuterForwardKeyCount = 0; + + /* * Single-Column String check for repeating. */ @@ -228,7 +203,13 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[0]; int keyStart = start[0]; int keyLength = length[0]; - joinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[0]); + joinResult = hashMap.lookup( + keyBytes, keyStart, keyLength, hashMapResults[0], matchTracker); + if (isFullOuterForwardKeysToIntersect && + joinResult == JoinUtil.JoinResult.MATCH && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = 0; // Index of repeated keys. + } } /* @@ -246,10 +227,6 @@ public void process(Object row, int tag) throws HiveException { * NOT Repeating. */ - if (LOG.isDebugEnabled()) { - LOG.debug(CLASS_NAME + " batch #" + batchCounter + " non-repeated"); - } - int selected[] = batch.selected; boolean selectedInUse = batch.selectedInUse; @@ -274,8 +251,6 @@ public void process(Object row, int tag) throws HiveException { for (int logical = 0; logical < batch.size; logical++) { int batchIndex = (selectedInUse ? selected[logical] : logical); - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, taskName + ", " + getOperatorId() + " candidate " + CLASS_NAME + " batch"); - /* * Single-Column String outer null detection. */ @@ -293,7 +268,6 @@ public void process(Object row, int tag) throws HiveException { atLeastOneNonMatch = true; - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " NULL"); } else { /* @@ -343,7 +317,8 @@ public void process(Object row, int tag) throws HiveException { byte[] keyBytes = vector[batchIndex]; int keyStart = start[batchIndex]; int keyLength = length[batchIndex]; - saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, hashMapResults[hashMapResultCount]); + saveJoinResult = hashMap.lookup(keyBytes, keyStart, keyLength, + hashMapResults[hashMapResultCount], matchTracker); /* * Common outer join result processing. @@ -351,12 +326,15 @@ public void process(Object row, int tag) throws HiveException { switch (saveJoinResult) { case MATCH: + if (isFullOuterForwardKeysToIntersect && + matchTracker.getIsFirstMatch()) { + fullOuterForwardKeys[fullOuterForwardKeyCount++] = batchIndex; + } equalKeySeriesHashMapResultIndices[equalKeySeriesCount] = hashMapResultCount; equalKeySeriesAllMatchIndices[equalKeySeriesCount] = allMatchCount; equalKeySeriesIsSingleValue[equalKeySeriesCount] = hashMapResults[hashMapResultCount].isSingleRow(); equalKeySeriesDuplicateCounts[equalKeySeriesCount] = 1; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH isSingleValue " + equalKeySeriesIsSingleValue[equalKeySeriesCount] + " currentKey " + currentKey); break; case SPILL: @@ -367,11 +345,9 @@ public void process(Object row, int tag) throws HiveException { case NOMATCH: atLeastOneNonMatch = true; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH" + " currentKey " + currentKey); break; } } else { - // LOG.debug(CLASS_NAME + " logical " + logical + " batchIndex " + batchIndex + " Key Continues " + saveKey + " " + saveJoinResult.name()); // Series of equal keys. @@ -379,7 +355,6 @@ public void process(Object row, int tag) throws HiveException { case MATCH: equalKeySeriesDuplicateCounts[equalKeySeriesCount]++; allMatchs[allMatchCount++] = batchIndex; - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " MATCH duplicate"); break; case SPILL: @@ -389,13 +364,9 @@ public void process(Object row, int tag) throws HiveException { break; case NOMATCH: - // VectorizedBatchUtil.debugDisplayOneRow(batch, batchIndex, CLASS_NAME + " NOMATCH duplicate"); break; } } - // if (!verifyMonotonicallyIncreasing(allMatchs, allMatchCount)) { - // throw new HiveException("allMatchs is not in sort order and unique"); - // } } } @@ -437,9 +408,19 @@ public void process(Object row, int tag) throws HiveException { } if (batch.size > 0) { - // Forward any remaining selected rows. + + // Forward any rows in the Big Table batch that had results added (they will be selected). + // NOTE: Other result rows may have been generated in the overflowBatch. forwardBigTableBatch(batch); } + if (isFullOuterForwardKeysToIntersect && + fullOuterForwardKeyCount > 0) { + + // Forward any keys in the Big Table batch that were a first-time match. + // NOTE: We cannot have a LIMIT running below FULL OUTER that cause setDone because it will + // disrupt Intersect processing... + forwardFullOuterKeysToInterset(batch, fullOuterForwardKeyCount); + } } catch (IOException e) { throw new HiveException(e); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java index 57db136..22f4d17 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMap.java @@ -23,9 +23,13 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -46,12 +50,115 @@ protected BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedBytesHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastBytesHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastBytesHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedReadPos = new WriteBuffers.Position(); + nonMatchedKeyByteSegmentRef = new ByteSegmentRef(); + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount) { + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedTripleIndex = nonMatchedLogicalSlotNum * 3; + if (hashMap.slotTriples[nonMatchedTripleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotTriples[nonMatchedTripleIndex + 2]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + if (keyIsNull) { + return false; + } + hashMap.keyStore.getKey( + hashMap.slotTriples[nonMatchedLogicalSlotNum * 3], + nonMatchedKeyByteSegmentRef, + nonMatchedReadPos); + return true; + } + + @Override + public byte[] getNonMatchedBytes() { + return nonMatchedKeyByteSegmentRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) nonMatchedKeyByteSegmentRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return nonMatchedKeyByteSegmentRef.getLength(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } + + @Override public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, long hashCode, boolean isNewKey, BytesWritable currentValue) { @@ -64,31 +171,56 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = valueStore.addFirst(valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2] = valueStore.addMore(slotTriples[tripleIndex + 2], valueBytes, 0, valueLength); - // LOG.debug("VectorMapJoinFastBytesHashMap add more new valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } } @Override - public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) { + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult) { VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long valueRefWord = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (valueRefWord == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - // LOG.debug("VectorMapJoinFastBytesHashMap lookup hashCode " + Long.toHexString(hashCode) + " valueRefWord " + Long.toHexString(valueRefWord) + " (valueStore != null) " + (valueStore != null)); + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); - optimizedHashMapResult.set(valueStore, valueRefWord); + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) { + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMapResult.getReadPos()); + JoinUtil.JoinResult joinResult; + if (tripleIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + optimizedHashMapResult.set(valueStore, slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -98,10 +230,40 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, return joinResult; } + @Override + public void lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, readPos); + JoinUtil.JoinResult joinResult; + if (tripleIndex != -1) { + matchTracker.trackMatch(tripleIndex / 3); + } + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastBytesHashMap( int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + fullOuterNullKeyValueRef = 0; + valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); // Share the same write buffers with our value store. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java index 726fd29..c0295dd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashMultiSet.java @@ -57,10 +57,8 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, slotTriples[tripleIndex] = keyStore.add(keyBytes, keyStart, keyLength); slotTriples[tripleIndex + 1] = hashCode; slotTriples[tripleIndex + 2] = 1; // Count. - // LOG.debug("VectorMapJoinFastBytesHashMap add first keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); } else { // Add another value. - // LOG.debug("VectorMapJoinFastBytesHashMap add more keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); slotTriples[tripleIndex + 2]++; } } @@ -75,13 +73,20 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long count = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashMultiSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ + optimizedHashMultiSetResult.set(slotTriples[tripleIndex + 2]); joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java index 5d750a8..e99a029 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashSet.java @@ -65,11 +65,19 @@ public void assignSlot(int slot, byte[] keyBytes, int keyStart, int keyLength, optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.murmurHash(keyBytes, keyStart, keyLength); - long existance = findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); + int tripleIndex = + findReadSlot(keyBytes, keyStart, keyLength, hashCode, hashSetResult.getReadPos()); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (tripleIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(tripleIndex / 3); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java index f2b794f..dcb89b6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastBytesHashTable.java @@ -70,13 +70,11 @@ public void add(byte[] keyBytes, int keyStart, int keyLength, BytesWritable curr while (true) { int tripleIndex = 3 * slot; if (slotTriples[tripleIndex] == 0) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " empty"); isNewKey = true;; break; } if (hashCode == slotTriples[tripleIndex + 1] && keyStore.unsafeEqualKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength)) { - // LOG.debug("VectorMapJoinFastBytesHashMap findWriteSlot slot " + slot + " tripleIndex " + tripleIndex + " existing"); isNewKey = false; break; } @@ -150,7 +148,6 @@ private void expandAndRehash() { } // Use old value reference word. - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash key " + tableKey + " slot " + newSlot + " newPairIndex " + newPairIndex + " empty slot (i = " + i + ")"); newSlotTriples[newTripleIndex] = keyRef; newSlotTriples[newTripleIndex + 1] = hashCode; @@ -165,10 +162,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected final long findReadSlot( + protected final int findReadSlot( byte[] keyBytes, int keyStart, int keyLength, long hashCode, WriteBuffers.Position readPos) { int intHashCode = (int) hashCode; @@ -177,7 +173,6 @@ protected final long findReadSlot( int i = 0; while (true) { int tripleIndex = slot * 3; - // LOG.debug("VectorMapJoinFastBytesHashMap findReadSlot slot keyRefWord " + Long.toHexString(slotTriples[tripleIndex]) + " hashCode " + Long.toHexString(hashCode) + " entry hashCode " + Long.toHexString(slotTriples[tripleIndex + 1]) + " valueRefWord " + Long.toHexString(slotTriples[tripleIndex + 2])); if (slotTriples[tripleIndex] == 0) { // Given that we do not delete, an empty slot means no match. return -1; @@ -185,7 +180,7 @@ protected final long findReadSlot( // Finally, verify the key bytes match. if (keyStore.equalKey(slotTriples[tripleIndex], keyBytes, keyStart, keyLength, readPos)) { - return slotTriples[tripleIndex + 2]; + return tripleIndex; } } // Some other key (collision) - keep probing. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java index cbcc9b1..a868b5f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastHashTable.java @@ -22,7 +22,9 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.mapjoin.MapJoinMemoryExhaustionError; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; public abstract class VectorMapJoinFastHashTable implements VectorMapJoinHashTable { public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastHashTable.class); @@ -96,4 +98,14 @@ public long getEstimatedMemorySize() { JavaDataModel jdm = JavaDataModel.get(); return JavaDataModel.alignUp(10L * jdm.primitive1() + jdm.primitive2(), jdm.memoryAlign()); } + + @Override + public MatchTracker createMatchTracker() { + return new MatchTracker(logicalHashBucketCount); + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java index b6684e0..0a3c84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastKeyStore.java @@ -22,6 +22,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; // Optimized for sequential key lookup. @@ -124,13 +125,11 @@ public boolean unsafeEqualKey(long keyRefWord, byte[] keyBytes, int keyStart, in public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyLength, WriteBuffers.Position readPos) { - int storedKeyLengthLength = + int storedKeyLength = (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); - boolean isKeyLengthSmall = (storedKeyLengthLength != SmallKeyLength.allBitsOn); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); - // LOG.debug("VectorMapJoinFastKeyStore equalKey keyLength " + keyLength + " isKeyLengthSmall " + isKeyLengthSmall + " storedKeyLengthLength " + storedKeyLengthLength + " keyRefWord " + Long.toHexString(keyRefWord)); - - if (isKeyLengthSmall && storedKeyLengthLength != keyLength) { + if (isKeyLengthSmall && storedKeyLength != keyLength) { return false; } long absoluteKeyOffset = @@ -139,16 +138,14 @@ public boolean equalKey(long keyRefWord, byte[] keyBytes, int keyStart, int keyL writeBuffers.setReadPoint(absoluteKeyOffset, readPos); if (!isKeyLengthSmall) { // Read big value length we wrote with the value. - storedKeyLengthLength = writeBuffers.readVInt(readPos); - if (storedKeyLengthLength != keyLength) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match big length"); + storedKeyLength = writeBuffers.readVInt(readPos); + if (storedKeyLength != keyLength) { return false; } } // Our reading is positioned to the key. if (!writeBuffers.isEqual(keyBytes, keyStart, readPos, keyLength)) { - // LOG.debug("VectorMapJoinFastKeyStore equalKey no match on bytes"); return false; } @@ -174,4 +171,23 @@ public long getEstimatedMemorySize() { size += unsafeReadPos == null ? 0 : unsafeReadPos.getEstimatedMemorySize(); return size; } + + public void getKey(long keyRefWord, ByteSegmentRef keyByteSegmentRef, + WriteBuffers.Position readPos) { + + int storedKeyLength = + (int) ((keyRefWord & SmallKeyLength.bitMask) >> SmallKeyLength.bitShift); + boolean isKeyLengthSmall = (storedKeyLength != SmallKeyLength.allBitsOn); + + long absoluteKeyOffset = + (keyRefWord & AbsoluteKeyOffset.bitMask); + + writeBuffers.setReadPoint(absoluteKeyOffset, readPos); + if (!isKeyLengthSmall) { + // Read big value length we wrote with the value. + storedKeyLength = writeBuffers.readVInt(readPos); + } + writeBuffers.getByteSegmentRefToCurrent(keyByteSegmentRef, storedKeyLength, readPos); + } + } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java index f42430d..385150f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMap.java @@ -22,13 +22,17 @@ import org.apache.hadoop.hive.common.MemoryEstimate; import org.apache.hadoop.hive.ql.util.JavaDataModel; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; +// import org.slf4j.Logger; +// import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hive.common.util.HashCodeUtil; @@ -41,17 +45,117 @@ extends VectorMapJoinFastLongHashTable implements VectorMapJoinLongHashMap, MemoryEstimate { - public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + // public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMap.class); + + private final boolean isSaveNullKeyValuesForFullOuter; protected VectorMapJoinFastValueStore valueStore; private BytesWritable testValueBytesWritable; + private long fullOuterNullKeyValueRef; + + private static class NonMatchedLongHashMapIterator extends VectorMapJoinFastNonMatchedIterator { + + private VectorMapJoinFastLongHashMap hashMap; + + private boolean noMore; + private boolean keyIsNull; + + private WriteBuffers.Position nonMatchedReadPos; + + private ByteSegmentRef nonMatchedKeyByteSegmentRef; + + private VectorMapJoinFastValueStore.HashMapResult nonMatchedHashMapResult; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinFastLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + noMore = false; + keyIsNull = false; + nonMatchedHashMapResult = new VectorMapJoinFastValueStore.HashMapResult(); + } + + @Override + public boolean findNextNonMatched() { + if (noMore) { + return false; + } + while (true) { + nonMatchedLogicalSlotNum++; + if (nonMatchedLogicalSlotNum >= hashMap.logicalHashBucketCount){ + + // Fall below and handle Small Table NULL key. + break; + } + final int nonMatchedDoubleIndex = nonMatchedLogicalSlotNum * 2; + if (hashMap.slotPairs[nonMatchedDoubleIndex] != 0) { + if (!matchTracker.wasMatched(nonMatchedLogicalSlotNum)) { + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.slotPairs[nonMatchedDoubleIndex]); + keyIsNull = false; + return true; + } + } + } + + // Do we have a Small Table NULL Key? + if (hashMap.fullOuterNullKeyValueRef == 0) { + return false; + } + nonMatchedHashMapResult.set( + hashMap.valueStore, hashMap.fullOuterNullKeyValueRef); + noMore = true; + keyIsNull = true; + return true; + } + + @Override + public boolean readNonMatchedLongKey() { + return !keyIsNull; + } + + @Override + public long getNonMatchedLongKey() { + return hashMap.slotPairs[nonMatchedLogicalSlotNum * 2 + 1]; + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + return nonMatchedHashMapResult; + } + } + @Override public VectorMapJoinHashMapResult createHashMapResult() { return new VectorMapJoinFastValueStore.HashMapResult(); } + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); + } + + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + + } + } + /* * A Unit Test convenience method for putting key and value into the hash table using the * actual types. @@ -91,13 +195,40 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMapResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - // LOG.debug("VectorMapJoinFastLongHashMap lookup " + key + " hashCode " + hashCode); - long valueRef = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex == -1) { + joinResult = JoinUtil.JoinResult.NOMATCH; + } else { + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); + + joinResult = JoinUtil.JoinResult.MATCH; + } + + optimizedHashMapResult.setJoinResult(joinResult); + + return joinResult; + } + + @Override + public JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) { + + VectorMapJoinFastValueStore.HashMapResult optimizedHashMapResult = + (VectorMapJoinFastValueStore.HashMapResult) hashMapResult; + + optimizedHashMapResult.forget(); + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (valueRef == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMapResult.set(valueStore, valueRef); + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + optimizedHashMapResult.set(valueStore, slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -107,12 +238,42 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre return joinResult; } + @Override + public void lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) { + + long hashCode = HashCodeUtil.calculateLongHashCode(key); + int pairIndex = findReadSlot(key, hashCode); + JoinUtil.JoinResult joinResult; + if (pairIndex != -1) { + matchTracker.trackMatch(pairIndex / 2); + } + } + + public void addFullOuterNullKeyValue(BytesWritable currentValue) { + + byte[] valueBytes = currentValue.getBytes(); + int valueLength = currentValue.getLength(); + + if (fullOuterNullKeyValueRef == 0) { + fullOuterNullKeyValueRef = valueStore.addFirst(valueBytes, 0, valueLength); + } else { + + // Add another value. + fullOuterNullKeyValueRef = + valueStore.addMore(fullOuterNullKeyValueRef, valueBytes, 0, valueLength); + } + } + public VectorMapJoinFastLongHashMap( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; valueStore = new VectorMapJoinFastValueStore(writeBuffersSize); + fullOuterNullKeyValueRef = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java index 228fa72..eda8a56 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashMultiSet.java @@ -42,11 +42,29 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastLongHashMultiSet.class); + private final boolean isSaveNullKeyValuesForFullOuter; + + private long fullOuterNullKeyValueCount; + @Override public VectorMapJoinHashMultiSetResult createHashMultiSetResult() { return new VectorMapJoinFastHashMultiSet.HashMultiSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + if (!adaptPutRow(currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + + } + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -80,12 +98,19 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre optimizedHashMultiSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long count = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (count == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { - optimizedHashMultiSetResult.set(count); + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ + optimizedHashMultiSetResult.set(slotPairs[pairIndex]); joinResult = JoinUtil.JoinResult.MATCH; } @@ -95,10 +120,14 @@ public void assignSlot(int slot, long key, boolean isNewKey, BytesWritable curre } public VectorMapJoinFastLongHashMultiSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + boolean isSaveNullKeyValuesForFullOuter, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java index 4c049cb..14b1965 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashSet.java @@ -47,6 +47,14 @@ public VectorMapJoinHashSetResult createHashSetResult() { return new VectorMapJoinFastHashSet.HashSetResult(); } + @Override + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). + adaptPutRow(currentKey, currentValue); + } + /* * A Unit Test convenience method for putting the key into the hash table using the * actual type. @@ -76,11 +84,18 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { optimizedHashSetResult.forget(); long hashCode = HashCodeUtil.calculateLongHashCode(key); - long existance = findReadSlot(key, hashCode); + int pairIndex = findReadSlot(key, hashCode); JoinUtil.JoinResult joinResult; - if (existance == -1) { + if (pairIndex == -1) { joinResult = JoinUtil.JoinResult.NOMATCH; } else { + /* + * NOTE: Support for trackMatched not needed yet for Set. + + if (matchTracker != null) { + matchTracker.trackMatch(pairIndex / 2); + } + */ joinResult = JoinUtil.JoinResult.MATCH; } @@ -91,9 +106,10 @@ public JoinResult contains(long key, VectorMapJoinHashSetResult hashSetResult) { } public VectorMapJoinFastLongHashSet( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { - super(minMaxEnabled, isOuterJoin, hashTableKeyType, + super(minMaxEnabled, hashTableKeyType, initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java index c9c3e80..8b775fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastLongHashTable.java @@ -49,8 +49,6 @@ private final HashTableKeyType hashTableKeyType; - private final boolean isOuterJoin; - private final BinarySortableDeserializeRead keyBinarySortableDeserializeRead; private final boolean useMinMax; @@ -72,14 +70,13 @@ public long max() { return max; } - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public boolean adaptPutRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); int keyLength = currentKey.getLength(); keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -92,6 +89,7 @@ public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws keyBinarySortableDeserializeRead, hashTableKeyType); add(key, currentValue); + return true; } protected abstract void assignSlot(int slot, long key, boolean isNewKey, BytesWritable currentValue); @@ -215,10 +213,9 @@ private void expandAndRehash() { largestNumberOfSteps = newLargestNumberOfSteps; resizeThreshold = (int)(logicalHashBucketCount * loadFactor); metricExpands++; - // LOG.debug("VectorMapJoinFastLongHashTable expandAndRehash new logicalHashBucketCount " + logicalHashBucketCount + " resizeThreshold " + resizeThreshold + " metricExpands " + metricExpands); } - protected long findReadSlot(long key, long hashCode) { + protected int findReadSlot(long key, long hashCode) { int intHashCode = (int) hashCode; int slot = intHashCode & logicalHashBucketMask; @@ -230,20 +227,16 @@ protected long findReadSlot(long key, long hashCode) { long valueRef = slotPairs[pairIndex]; if (valueRef == 0) { // Given that we do not delete, an empty slot means no match. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " empty slot (i = " + i + ")"); return -1; } long tableKey = slotPairs[pairIndex + 1]; if (key == tableKey) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " found key (i = " + i + ")"); - return slotPairs[pairIndex]; + return pairIndex; } // Some other key (collision) - keep probing. probeSlot += (++i); if (i > largestNumberOfSteps) { - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot returning not found"); // We know we never went that far when we were inserting. - // LOG.debug("VectorMapJoinFastLongHashTable findReadSlot key " + key + " slot " + slot + " pairIndex " + pairIndex + " largestNumberOfSteps " + largestNumberOfSteps + " (i = " + i + ")"); return -1; } slot = (int)(probeSlot & logicalHashBucketMask); @@ -268,10 +261,10 @@ private void allocateBucketArray() { } public VectorMapJoinFastLongHashTable( - boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, + boolean minMaxEnabled, + HashTableKeyType hashTableKeyType, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - this.isOuterJoin = isOuterJoin; this.hashTableKeyType = hashTableKeyType; PrimitiveTypeInfo[] primitiveTypeInfos = { hashTableKeyType.getPrimitiveTypeInfo() }; keyBinarySortableDeserializeRead = diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java index 2798010..4a63772 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMap.java @@ -49,8 +49,8 @@ public void testPutRow(byte[] currentKey, byte[] currentValue) throws HiveExcept } public VectorMapJoinFastMultiKeyHashMap( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java index 0560281..31aa95f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashMultiSet.java @@ -47,8 +47,8 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashMultiSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + boolean isSaveNullKeyValuesForFullOuter, + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java index 900ca55..ed8b989 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastMultiKeyHashSet.java @@ -47,8 +47,7 @@ public void testPutRow(byte[] currentKey) throws HiveException, IOException { } public VectorMapJoinFastMultiKeyHashSet( - boolean isOuterJoin, - int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { + int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java new file mode 100644 index 0000000..3d29cf4 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastNonMatchedIterator.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; + +/** + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinFastNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinFastNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public void init() { + nonMatchedLogicalSlotNum = -1; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java index 777eb45..1b108a8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringCommon.java @@ -35,11 +35,9 @@ public static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinFastStringCommon.class); - private boolean isOuterJoin; - private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, + public boolean adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { byte[] keyBytes = currentKey.getBytes(); @@ -47,7 +45,7 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.set(keyBytes, 0, keyLength); try { if (!keyBinarySortableDeserializeRead.readNextField()) { - return; + return false; } } catch (Exception e) { throw new HiveException( @@ -61,14 +59,14 @@ public void adaptPutRow(VectorMapJoinFastBytesHashTable hashTable, keyBinarySortableDeserializeRead.currentBytesStart, keyBinarySortableDeserializeRead.currentBytesLength, currentValue); + return true; } - public VectorMapJoinFastStringCommon(boolean isOuterJoin) { - this.isOuterJoin = isOuterJoin; + public VectorMapJoinFastStringCommon() { PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead( primitiveTypeInfos, /* useExternalBuffer */ false); } -} \ No newline at end of file +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java index fc4edda..56068f3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMap.java @@ -30,18 +30,27 @@ */ public class VectorMapJoinFastStringHashMap extends VectorMapJoinFastBytesHashMap { + private final boolean isSaveNullKeyValuesForFullOuter; + private VectorMapJoinFastStringCommon stringCommon; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + addFullOuterNullKeyValue(currentValue); + } + } } public VectorMapJoinFastStringHashMap( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java index 3dbdfa7..911a61e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashMultiSet.java @@ -30,18 +30,30 @@ */ public class VectorMapJoinFastStringHashMultiSet extends VectorMapJoinFastBytesHashMultiSet { - private VectorMapJoinFastStringCommon stringCommon; + private final boolean isSaveNullKeyValuesForFullOuter; + + private final VectorMapJoinFastStringCommon stringCommon; + + private long fullOuterNullKeyValueCount; @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { - stringCommon.adaptPutRow(this, currentKey, currentValue); + if (!stringCommon.adaptPutRow(this, currentKey, currentValue)) { + + // Ignore NULL keys, except for FULL OUTER. + if (isSaveNullKeyValuesForFullOuter) { + fullOuterNullKeyValueCount++; + } + } } public VectorMapJoinFastStringHashMultiSet( - boolean isOuterJoin, + boolean isSaveNullKeyValuesForFullOuter, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + fullOuterNullKeyValueCount = 0; + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java index 84f8439..3dc7847 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastStringHashSet.java @@ -33,15 +33,17 @@ private VectorMapJoinFastStringCommon stringCommon; @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws HiveException, IOException { + public void putRow(BytesWritable currentKey, BytesWritable currentValue) + throws HiveException, IOException { + + // Ignore NULL keys (HashSet not used for FULL OUTER). stringCommon.adaptPutRow(this, currentKey, currentValue); } public VectorMapJoinFastStringHashSet( - boolean isOuterJoin, int initialCapacity, float loadFactor, int writeBuffersSize, long estimatedKeyCount) { super(initialCapacity, loadFactor, writeBuffersSize, estimatedKeyCount); - stringCommon = new VectorMapJoinFastStringCommon(isOuterJoin); + stringCommon = new VectorMapJoinFastStringCommon(); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java index 24dfa5d..3e41ec0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VectorMapJoinFastTableContainer.java @@ -27,6 +27,8 @@ import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinTableContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -73,11 +75,6 @@ public VectorMapJoinFastTableContainer(MapJoinDesc desc, Configuration hconf, this.estimatedKeyCount = estimatedKeyCount; - // LOG.info("VectorMapJoinFastTableContainer load keyCountAdj " + keyCountAdj); - // LOG.info("VectorMapJoinFastTableContainer load threshold " + threshold); - // LOG.info("VectorMapJoinFastTableContainer load loadFactor " + loadFactor); - // LOG.info("VectorMapJoinFastTableContainer load wbSize " + wbSize); - int newThreshold = HashMapWrapper.calculateTableSize( keyCountAdj, threshold, loadFactor, estimatedKeyCount); @@ -93,13 +90,11 @@ public VectorMapJoinHashTable vectorMapJoinHashTable() { private VectorMapJoinFastHashTable createHashTable(int newThreshold) { - boolean isOuterJoin = !desc.isNoOuterJoin(); - - // UNDONE VectorMapJoinDesc vectorDesc = (VectorMapJoinDesc) desc.getVectorDesc(); HashTableImplementationType hashTableImplementationType = vectorDesc.getHashTableImplementationType(); HashTableKind hashTableKind = vectorDesc.getHashTableKind(); HashTableKeyType hashTableKeyType = vectorDesc.getHashTableKeyType(); + boolean isSaveNullKeyValuesForFullOuter = vectorDesc.getIsSaveNullKeyValuesForFullOuter(); boolean minMaxEnabled = vectorDesc.getMinMaxEnabled(); int writeBufferSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE); @@ -115,18 +110,23 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastLongHashMap( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastLongHashMultiSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + isSaveNullKeyValuesForFullOuter, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastLongHashSet( - minMaxEnabled, isOuterJoin, hashTableKeyType, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + minMaxEnabled, + hashTableKeyType, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -135,18 +135,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastStringHashMap( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastStringHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastStringHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -155,18 +154,17 @@ private VectorMapJoinFastHashTable createHashTable(int newThreshold) { switch (hashTableKind) { case HASH_MAP: hashTable = new VectorMapJoinFastMultiKeyHashMap( - isOuterJoin, + isSaveNullKeyValuesForFullOuter, newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_MULTISET: hashTable = new VectorMapJoinFastMultiKeyHashMultiSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + isSaveNullKeyValuesForFullOuter, + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; case HASH_SET: hashTable = new VectorMapJoinFastMultiKeyHashSet( - isOuterJoin, - newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); + newThreshold, loadFactor, writeBufferSize, estimatedKeyCount); break; } break; @@ -195,6 +193,12 @@ public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) { } @Override + public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator( + MatchTracker matchTracker) { + throw new RuntimeException("Not applicable"); + } + + @Override public void clear() { // Do nothing } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java index 2408484..ae057fe 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinBytesHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single byte array key hash map lookup method. @@ -41,6 +43,9 @@ * The object to receive small table value(s) information on a MATCH. * Or, for SPILL, it has information on where to spill the big table row. * + * NOTE: Since the hash table can be shared, the hashMapResult serves as the non-shared + * private object for our accessing the hash table lookup values, etc. + * * @return * Whether the lookup was a match, no match, or spill (the partition with the key * is currently spilled). @@ -48,4 +53,28 @@ JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException; + + /* + * Lookup a byte array key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * ... + */ + void lookupNoResult(byte[] keyBytes, int keyStart, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java index 2d2490c..5762cff 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashMap.java @@ -30,5 +30,4 @@ * access spill information when the partition with the key is currently spilled. */ VectorMapJoinHashMapResult createHashMapResult(); - } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java index e49da04..820678b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinHashTable.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.hadoop.hive.common.MemoryEstimate; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.io.BytesWritable; @@ -31,7 +32,6 @@ */ public interface VectorMapJoinHashTable extends MemoryEstimate { - /* * @param currentKey * The current key. @@ -45,4 +45,8 @@ void putRow(BytesWritable currentKey, BytesWritable currentValue) * Get hash table size */ int size(); + + MatchTracker createMatchTracker(); + + VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker); } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java index ba68d35..c70d84a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashMap.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.serde2.WriteBuffers; /* * The interface for a single long key hash map lookup method. @@ -43,4 +45,28 @@ */ JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult) throws IOException; + /* + * A version of lookup with match tracking. + * ... + * @param matchTracker + * Optional key match tracking. + * + * NOTE: Since the hash table can be shared, the matchTracker serves as the non-shared + * private object for tracking our key matches in the hash table. + * ... + */ + JoinUtil.JoinResult lookup(long key, VectorMapJoinHashMapResult hashMapResult, + MatchTracker matchTracker) throws IOException; + + /* + * Lookup a long key in the hash map for key match tracking purposes; no result. + * ... + * @param readPos + * @param matchTracker + * NOTE: Since the hash table can be shared, the readPos and matchTracker serve as + * non-shared private objects for looking up and tracking key matches in the hash table. + * ... + */ + void lookupNoResult(long key, WriteBuffers.Position readPos, MatchTracker matchTracker) + throws IOException; } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java index d0f9dcb..74cfb9c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinLongHashTable.java @@ -27,5 +27,4 @@ boolean useMinMax(); long min(); long max(); - } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java new file mode 100644 index 0000000..9403bc2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/hashtable/VectorMapJoinNonMatchedIterator.java @@ -0,0 +1,72 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable; + +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.metadata.HiveException; + +/* + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinNonMatchedIterator { + + protected final MatchTracker matchTracker; + + protected int nonMatchedLogicalSlotNum; + + public VectorMapJoinNonMatchedIterator(MatchTracker matchTracker) { + this.matchTracker = matchTracker; + } + + public void init() { + nonMatchedLogicalSlotNum = -1; + } + + public boolean findNextNonMatched() { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public long getNonMatchedLongKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public boolean readNonMatchedBytesKey() throws HiveException { + throw new RuntimeException("Not implemented"); + } + + public byte[] getNonMatchedBytes() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesOffset() { + throw new RuntimeException("Not implemented"); + } + + public int getNonMatchedBytesLength() { + throw new RuntimeException("Not implemented"); + } + + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + throw new RuntimeException("Not implemented"); + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java index f95cd76..21c355c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedCreateHashTable.java @@ -116,16 +116,4 @@ public static VectorMapJoinOptimizedHashTable createHashTable(MapJoinDesc desc, } return hashTable; } - - /* - @Override - public com.esotericsoftware.kryo.io.Output getHybridBigTableSpillOutput(int partitionId) { - - HybridHashTableContainer ht = (HybridHashTableContainer) mapJoinTableContainer; - - HashPartition hp = ht.getHashPartitions()[partitionId]; - - return hp.getMatchfileOutput(); - } - */ } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java index 9242702..9a99112 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMap.java @@ -22,11 +22,16 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; public class VectorMapJoinOptimizedHashMap @@ -40,13 +45,18 @@ public VectorMapJoinHashMapResult createHashMapResult() { public static class HashMapResult extends VectorMapJoinHashMapResult { - private BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; + private final BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult; public HashMapResult() { super(); bytesBytesMultiHashMapResult = new BytesBytesMultiHashMap.Result(); } + public HashMapResult(BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult) { + super(); + this.bytesBytesMultiHashMapResult = bytesBytesMultiHashMapResult; + } + public BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult() { return bytesBytesMultiHashMapResult; } @@ -106,7 +116,59 @@ public String toString() { public String getDetailedHashMapResultPositionString() { return "(Not supported yet)"; } - } + } + + protected static class NonMatchedBytesHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedHashMap hashMap; + + protected ByteSegmentRef keyRef; + + public NonMatchedBytesHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinBytesTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + } + + public void doReadNonMatchedBytesKey() throws HiveException { + keyRef = nonMatchedIterator.getCurrentKeyAsRef(); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + doReadNonMatchedBytesKey(); + return true; // We have not interpreted the bytes, so return true. + } + + @Override + public byte[] getNonMatchedBytes() { + return keyRef.getBytes(); + } + + @Override + public int getNonMatchedBytesOffset() { + return (int) keyRef.getOffset(); + } + + @Override + public int getNonMatchedBytesLength() { + return keyRef.getLength(); + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedBytesHashMapIterator(matchTracker, this); + } @Override public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, @@ -117,11 +179,32 @@ public String getDetailedHashMapResultPositionString() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMapResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMapResult); + (VectorMapJoinHashTableResult) hashMapResult, null); return joinResult; } + @Override + public JoinUtil.JoinResult lookup(byte[] keyBytes, int keyOffset, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + HashMapResult implementationHashMapResult = (HashMapResult) hashMapResult; + + JoinUtil.JoinResult joinResult = + doLookup(keyBytes, keyOffset, keyLength, + implementationHashMapResult.bytesBytesMultiHashMapResult(), + (VectorMapJoinHashTableResult) hashMapResult, matchTracker); + + return joinResult; + } + + @Override + public void lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + doLookupNoResult(keyBytes, keyOffset, keyLength, readPos, matchTracker); + } + public VectorMapJoinOptimizedHashMap( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java index 9921a88..cfe128c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashMultiSet.java @@ -91,7 +91,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashMultiSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashMultiSetResult); + (VectorMapJoinHashTableResult) hashMultiSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java index 122f881..8f53ada 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashSet.java @@ -66,7 +66,7 @@ public void forget() { JoinUtil.JoinResult joinResult = doLookup(keyBytes, keyOffset, keyLength, implementationHashSetResult.bytesBytesMultiHashMapResult(), - (VectorMapJoinHashTableResult) hashSetResult); + (VectorMapJoinHashTableResult) hashSetResult, null); return joinResult; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java index 74887f7..fd183e1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedHashTable.java @@ -25,14 +25,23 @@ import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.JoinUtil; import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.ReusableGetAdaptorDirectAccess; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashTable; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTable; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashTableResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.NonMatchedBytesHashMapIterator; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; @@ -40,7 +49,8 @@ * Root interface for a vector map join hash table (which could be a hash map, hash multi-set, or * hash set). */ -public abstract class VectorMapJoinOptimizedHashTable implements VectorMapJoinHashTable { +public abstract class VectorMapJoinOptimizedHashTable + implements VectorMapJoinHashTable, VectorMapJoinBytesHashTable { private static final Logger LOG = LoggerFactory.getLogger(VectorMapJoinOptimizedMultiKeyHashMap.class.getName()); @@ -55,6 +65,11 @@ } @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + throw new RuntimeException("Not implemented"); + } + + @Override public void putRow(BytesWritable currentKey, BytesWritable currentValue) throws SerDeException, HiveException, IOException { @@ -69,13 +84,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) public JoinUtil.JoinResult doLookup(byte[] keyBytes, int keyOffset, int keyLength, BytesBytesMultiHashMap.Result bytesBytesMultiHashMapResult, - VectorMapJoinHashTableResult hashTableResult) { + VectorMapJoinHashTableResult hashTableResult, MatchTracker matchTracker) { hashTableResult.forget(); JoinUtil.JoinResult joinResult = adapatorDirectAccess.setDirect(keyBytes, keyOffset, keyLength, - bytesBytesMultiHashMapResult); + bytesBytesMultiHashMapResult, matchTracker); if (joinResult == JoinUtil.JoinResult.SPILL) { hashTableResult.setSpillPartitionId(adapatorDirectAccess.directSpillPartitionId()); } @@ -85,6 +100,13 @@ protected void putRowInternal(BytesWritable key, BytesWritable value) return joinResult; } + public void doLookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) { + + adapatorDirectAccess.setDirectNoResult( + keyBytes, keyOffset, keyLength, readPos, matchTracker); + } + public VectorMapJoinOptimizedHashTable( MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { @@ -105,4 +127,9 @@ public long getEstimatedMemorySize() { size += (2 * JavaDataModel.get().object()); return size; } + + @Override + public MatchTracker createMatchTracker() { + return adapatorDirectAccess.createMatchTracker(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java index 9c45ed9..de1ee15 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongCommon.java @@ -41,8 +41,6 @@ private HashTableKeyType hashTableKeyType; - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java index b21f0b3..35cfc57 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedLongHashMap.java @@ -21,11 +21,20 @@ import java.io.IOException; import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* * An single long value hash map based on the BytesBytesMultiHashMap. @@ -37,8 +46,104 @@ extends VectorMapJoinOptimizedHashMap implements VectorMapJoinLongHashMap { + private HashTableKeyType hashTableKeyType; + private VectorMapJoinOptimizedLongCommon longCommon; + private static class NonMatchedLongHashMapIterator + extends VectorMapJoinOptimizedNonMatchedIterator { + + private VectorMapJoinOptimizedLongHashMap hashMap; + + // Extract long with non-shared deserializer object. + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + private long longValue; + + NonMatchedLongHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedLongHashMap hashMap) { + super(matchTracker); + this.hashMap = hashMap; + } + + @Override + public void init() { + super.init(); + nonMatchedIterator = + ((MapJoinBytesTableContainer) hashMap.originalTableContainer). + createNonMatchedSmallTableIterator(matchTracker); + + TypeInfo integerTypeInfo; + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + integerTypeInfo = TypeInfoFactory.booleanTypeInfo; + break; + case BYTE: + integerTypeInfo = TypeInfoFactory.byteTypeInfo; + break; + case SHORT: + integerTypeInfo = TypeInfoFactory.shortTypeInfo; + break; + case INT: + integerTypeInfo = TypeInfoFactory.intTypeInfo; + break; + case LONG: + integerTypeInfo = TypeInfoFactory.longTypeInfo; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead( + new TypeInfo[] {integerTypeInfo}, false); + } + + private boolean readNonMatchedLongKey(ByteSegmentRef keyRef) throws HiveException { + + try { + byte[] keyBytes = keyRef.getBytes(); + int keyOffset = (int) keyRef.getOffset(); + int keyLength = keyRef.getLength(); + keyBinarySortableDeserializeRead.set(keyBytes, keyOffset, keyLength); + if (!keyBinarySortableDeserializeRead.readNextField()) { + return false; + } + switch (hashMap.hashTableKeyType) { + case BOOLEAN: + longValue = keyBinarySortableDeserializeRead.currentBoolean ? 1 : 0; + break; + case BYTE: + longValue = keyBinarySortableDeserializeRead.currentByte; + break; + case SHORT: + longValue = keyBinarySortableDeserializeRead.currentShort; + break; + case INT: + longValue = keyBinarySortableDeserializeRead.currentInt; + break; + case LONG: + longValue = keyBinarySortableDeserializeRead.currentLong; + break; + default: + throw new RuntimeException("Unexpected key type " + hashMap.hashTableKeyType); + } + } catch (IOException e) { + throw new HiveException(e); + } + return true; + } + + @Override + public boolean readNonMatchedLongKey() throws HiveException { + return readNonMatchedLongKey(nonMatchedIterator.getCurrentKeyAsRef()); + } + + @Override + public long getNonMatchedLongKey() throws HiveException { + return longValue; + } + } + @Override public boolean useMinMax() { return longCommon.useMinMax(); @@ -54,14 +159,10 @@ public long max() { return longCommon.max(); } - /* @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { - - longCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedLongHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(long key, @@ -73,10 +174,31 @@ public JoinResult lookup(long key, hashMapResult); } + @Override + public JoinResult lookup(long key, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + } + + @Override + public void lookupNoResult(long key, WriteBuffers.Position readPos, + MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = longCommon.serialize(key); + + super.lookupNoResult(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedLongHashMap( boolean minMaxEnabled, boolean isOuterJoin, HashTableKeyType hashTableKeyType, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); + this.hashTableKeyType = hashTableKeyType; longCommon = new VectorMapJoinOptimizedLongCommon(minMaxEnabled, isOuterJoin, hashTableKeyType); } } \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java index 3e8e6fb..e07bbaa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedMultiKeyHashMap.java @@ -27,8 +27,6 @@ public class VectorMapJoinOptimizedMultiKeyHashMap extends VectorMapJoinOptimizedHashMap { - // UNDONE: How to look for all NULLs in a multi-key????? Let nulls through for now. - public VectorMapJoinOptimizedMultiKeyHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java new file mode 100644 index 0000000..694a8c9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedNonMatchedIterator.java @@ -0,0 +1,55 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized; + +import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.optimized.VectorMapJoinOptimizedHashMap.HashMapResult; + +/* + * The abstract class for vectorized non-match Small Table key iteration. + */ +public abstract class VectorMapJoinOptimizedNonMatchedIterator + extends VectorMapJoinNonMatchedIterator { + + protected NonMatchedSmallTableIterator nonMatchedIterator; + + protected HashMapResult nonMatchedHashMapResult; + + public VectorMapJoinOptimizedNonMatchedIterator(MatchTracker matchTracker) { + super(matchTracker); + } + + @Override + public boolean findNextNonMatched() { + return nonMatchedIterator.isNext(); + } + + @Override + public VectorMapJoinHashMapResult getNonMatchedHashMapResult() { + if (nonMatchedHashMapResult == null) { + nonMatchedHashMapResult = new HashMapResult(nonMatchedIterator.getHashMapResult()); + } + nonMatchedHashMapResult.setJoinResult(JoinResult.MATCH); + return nonMatchedHashMapResult; + } +} \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java index a8ccfa4..fd8f116 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringCommon.java @@ -26,19 +26,9 @@ /* * An single byte array value hash map based on the BytesBytesMultiHashMap. - * - * Since BytesBytesMultiHashMap does not interpret the key as BinarySortable we optimize - * this case and just reference the byte array key directly for the lookup instead of serializing - * the byte array into BinarySortable. We rely on it just doing byte array equality comparisons. */ public class VectorMapJoinOptimizedStringCommon { - // private boolean isOuterJoin; - - // private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; - - // private ReadStringResults readStringResults; - private BinarySortableSerializeWrite keyBinarySortableSerializeWrite; private transient Output output; @@ -55,15 +45,10 @@ public SerializedBytes serialize(byte[] keyBytes, int keyStart, int keyLength) t serializedBytes.length = output.getLength(); return serializedBytes; - } public VectorMapJoinOptimizedStringCommon(boolean isOuterJoin) { - // this.isOuterJoin = isOuterJoin; - // PrimitiveTypeInfo[] primitiveTypeInfos = { TypeInfoFactory.stringTypeInfo }; - // keyBinarySortableDeserializeRead = new BinarySortableDeserializeRead(primitiveTypeInfos); - // readStringResults = keyBinarySortableDeserializeRead.createReadStringResults(); - // bytesWritable = new BytesWritable(); + keyBinarySortableSerializeWrite = new BinarySortableSerializeWrite(1); output = new Output(); keyBinarySortableSerializeWrite.set(output); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java index f2074ec..b822005 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMap.java @@ -22,12 +22,19 @@ import org.apache.hadoop.hive.ql.exec.JoinUtil.JoinResult; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.binarysortable.fast.BinarySortableDeserializeRead; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; /* - * An multi-key hash map based on the BytesBytesMultiHashMap. + * An string hash map based on the BytesBytesMultiHashMap. */ public class VectorMapJoinOptimizedStringHashMap extends VectorMapJoinOptimizedHashMap @@ -35,14 +42,59 @@ private VectorMapJoinOptimizedStringCommon stringCommon; - /* - @Override - public void putRow(BytesWritable currentKey, BytesWritable currentValue) - throws SerDeException, HiveException, IOException { + private static class NonMatchedStringHashMapIterator extends NonMatchedBytesHashMapIterator { + + private BinarySortableDeserializeRead keyBinarySortableDeserializeRead; + + public NonMatchedStringHashMapIterator(MatchTracker matchTracker, + VectorMapJoinOptimizedStringHashMap hashMap) { + super(matchTracker, hashMap); + } + + @Override + public void init() { + super.init(); + + TypeInfo[] typeInfos = new TypeInfo[] { TypeInfoFactory.stringTypeInfo }; + keyBinarySortableDeserializeRead = + new BinarySortableDeserializeRead(typeInfos, /* useExternalBuffer */ false); + } + + @Override + public boolean readNonMatchedBytesKey() throws HiveException { + super.doReadNonMatchedBytesKey(); + + byte[] bytes = keyRef.getBytes(); + final int keyOffset = (int) keyRef.getOffset(); + final int keyLength = keyRef.getLength(); + try { + keyBinarySortableDeserializeRead.set(bytes, keyOffset, keyLength); + return keyBinarySortableDeserializeRead.readNextField(); + } catch (IOException e) { + throw new HiveException(e); + } + } - stringCommon.adaptPutRow((VectorMapJoinOptimizedHashTable) this, currentKey, currentValue); + @Override + public byte[] getNonMatchedBytes() { + return keyBinarySortableDeserializeRead.currentBytes; + } + + @Override + public int getNonMatchedBytesOffset() { + return keyBinarySortableDeserializeRead.currentBytesStart; + } + + @Override + public int getNonMatchedBytesLength() { + return keyBinarySortableDeserializeRead.currentBytesLength; + } + } + + @Override + public VectorMapJoinNonMatchedIterator createNonMatchedIterator(MatchTracker matchTracker) { + return new NonMatchedStringHashMapIterator(matchTracker, this); } - */ @Override public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, @@ -55,6 +107,27 @@ public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, } + @Override + public JoinResult lookup(byte[] keyBytes, int keyStart, int keyLength, + VectorMapJoinHashMapResult hashMapResult, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyStart, keyLength); + + return super.lookup(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + hashMapResult, matchTracker); + + } + + @Override + public void lookupNoResult(byte[] keyBytes, int keyOffset, int keyLength, + WriteBuffers.Position readPos, MatchTracker matchTracker) throws IOException { + + SerializedBytes serializedBytes = stringCommon.serialize(keyBytes, keyOffset, keyLength); + + doLookupNoResult(serializedBytes.bytes, serializedBytes.offset, serializedBytes.length, + readPos, matchTracker); + } + public VectorMapJoinOptimizedStringHashMap(boolean isOuterJoin, MapJoinTableContainer originalTableContainer, ReusableGetAdaptor hashMapRowGetter) { super(originalTableContainer, hashMapRowGetter); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java index a6b754c..bb2fd03 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashMultiSet.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashMultiSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; /* * An multi-key hash map based on the BytesBytesMultiHashMultiSet. diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java index fdcd83d..229dadf 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/mapjoin/optimized/VectorMapJoinOptimizedStringHashSet.java @@ -25,6 +25,7 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinBytesHashSet; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; /* * An multi-key hash map based on the BytesBytesMultiHashSet. diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java index a235f3f..80c0bea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ConvertJoinMapJoin.java @@ -46,6 +46,7 @@ import org.apache.hadoop.hive.ql.exec.SelectOperator; import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.TezDummyStoreOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; import org.apache.hadoop.hive.ql.lib.Node; import org.apache.hadoop.hive.ql.lib.NodeProcessor; import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; @@ -83,7 +84,6 @@ private static final Logger LOG = LoggerFactory.getLogger(ConvertJoinMapJoin.class.getName()); - @Override /* * (non-Javadoc) we should ideally not modify the tree we traverse. However, @@ -119,6 +119,7 @@ boolean hiveConvertJoin = context.conf.getBoolVar(HiveConf.ConfVars.HIVECONVERTJOIN) & !context.parseContext.getDisableMapJoin(); if (!hiveConvertJoin) { + // we are just converting to a common merge join operator. The shuffle // join in map-reduce case. Object retval = checkAndConvertSMBJoin(context, joinOp, tezBucketJoinProcCtx, maxSize); @@ -182,6 +183,18 @@ // reduced by 1 mapJoinOp.setOpTraits(new OpTraits(null, -1, null, joinOp.getOpTraits().getNumReduceSinks())); mapJoinOp.setStatistics(joinOp.getStatistics()); + + JoinCondDesc[] conds = joinOp.getConf().getConds(); + if (conds.length == 1 && conds[0].getType() == JoinDesc.FULL_OUTER_JOIN) { + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + + FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc); + + if (!mapJoinDesc.isDynamicPartitionHashJoin()) { + FullOuterMapJoinOptimization.generateSharedMemoryPlan(mapJoinOp); + } + } + // propagate this change till the next RS for (Operator childOp : mapJoinOp.getChildOperators()) { setAllChildrenTraits(childOp, mapJoinOp.getOpTraits()); @@ -741,6 +754,7 @@ private boolean isCrossProduct(JoinOperator joinOp) { public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext context, int buckets, boolean skipJoinTypeChecks, long maxSize, boolean checkMapJoinThresholds) throws SemanticException { + JoinDesc joinDesc = joinOp.getConf(); if (!skipJoinTypeChecks) { /* * HIVE-9038: Join tests fail in tez when we have more than 1 join on the same key and there is @@ -749,14 +763,16 @@ public int getMapJoinConversionPos(JoinOperator joinOp, OptimizeTezProcContext c * new operation to be able to support this. This seems like a corner case enough to special * case this for now. */ - if (joinOp.getConf().getConds().length > 1) { + if (joinDesc.getConds().length > 1) { if (hasOuterJoin(joinOp)) { return -1; } } } + boolean isEnableFullOuterMapJoin = + MapJoinProcessor.determineEnableFullOuterMapJoin(context.conf, joinDesc); Set bigTableCandidateSet = - MapJoinProcessor.getBigTableCandidates(joinOp.getConf().getConds()); + MapJoinProcessor.getBigTableCandidates(joinDesc.getConds(), isEnableFullOuterMapJoin); int bigTablePosition = -1; // big input cumulative row count long bigInputCumulativeCardinality = -1L; @@ -937,13 +953,13 @@ public MapJoinOperator convertJoinMapJoin(JoinOperator joinOp, OptimizeTezProcCo MapJoinProcessor.convertJoinOpMapJoinOp(context.conf, joinOp, joinOp.getConf().isLeftInputJoin(), joinOp.getConf().getBaseSrc(), joinOp.getConf().getMapAliases(), bigTablePosition, true, removeReduceSink); - mapJoinOp.getConf().setHybridHashJoin(HiveConf.getBoolVar(context.conf, + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setHybridHashJoin(HiveConf.getBoolVar(context.conf, HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)); - List joinExprs = mapJoinOp.getConf().getKeys().values().iterator().next(); + List joinExprs = mapJoinDesc.getKeys().values().iterator().next(); if (joinExprs.size() == 0) { // In case of cross join, we disable hybrid grace hash join - mapJoinOp.getConf().setHybridHashJoin(false); + mapJoinDesc.setHybridHashJoin(false); } - Operator parentBigTableOp = mapJoinOp.getParentOperators().get(bigTablePosition); if (parentBigTableOp instanceof ReduceSinkOperator) { @@ -1159,7 +1175,12 @@ private boolean convertJoinDynamicPartitionedHashJoin(JoinOperator joinOp, Optim MapJoinOperator mapJoinOp = convertJoinMapJoin(joinOp, context, bigTablePos, false); if (mapJoinOp != null) { LOG.info("Selected dynamic partitioned hash join"); - mapJoinOp.getConf().setDynamicPartitionHashJoin(true); + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + mapJoinDesc.setDynamicPartitionHashJoin(true); + JoinCondDesc[] conds = mapJoinDesc.getConds(); + if (conds.length == 1 && conds[0].getType() == JoinDesc.FULL_OUTER_JOIN) { + FullOuterMapJoinOptimization.removeFilterMap(mapJoinDesc); + } // Set OpTraits for dynamically partitioned hash join: // bucketColNames: Re-use previous joinOp's bucketColNames. Parent operators should be // reduce sink, which should have bucket columns based on the join keys. @@ -1225,8 +1246,9 @@ private boolean checkNumberOfEntriesForHashTable(JoinOperator joinOp, int positi List columnStats = new ArrayList<>(); for (String key : keys) { ColStatistics cs = inputStats.getColumnStatisticsFromColName(key); + LOG.debug("Statistics obtained for {} of reduce sink operator {}: {}", + key, rsOp.toString(), (cs != null)); if (cs == null) { - LOG.debug("Couldn't get statistics for: {}", key); return true; } columnStats.add(cs); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java new file mode 100644 index 0000000..8b7b9c1 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/FullOuterMapJoinOptimization.java @@ -0,0 +1,438 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.TreeMap; + +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator.MapJoinBigTableInfo; +import org.apache.hadoop.hive.ql.io.AcidUtils.Operation; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.plan.OperatorDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.UnionDesc; +import org.apache.hadoop.hive.serde.serdeConstants; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; + +import com.google.common.base.Preconditions; +import com.google.common.base.Joiner; + +public class FullOuterMapJoinOptimization { + + /* + * Add 2nd ReduceSink for sending Small Table to FULL OUTER INTERSECT MapJoin, too. + */ + private static ReduceSinkOperator addIntersectSmallTableReduceSink( + MapJoinOperator mapJoinOp, int posSmallTable) + throws SemanticException { + + List> mapJoinParents = mapJoinOp.getParentOperators(); + Preconditions.checkState(mapJoinParents.get(posSmallTable) instanceof ReduceSinkOperator); + + ReduceSinkOperator smallTableReduceSink = + (ReduceSinkOperator) mapJoinOp.getParentOperators().get(posSmallTable); + + List> smallTableReduceSinkParents = + smallTableReduceSink.getParentOperators(); + Preconditions.checkState(smallTableReduceSinkParents.size() == 1); + + Operator smallTableReduceSinkParent = + smallTableReduceSinkParents.get(0); + + ReduceSinkDesc intersectSmallTableReduceSinkDesc = + (ReduceSinkDesc) smallTableReduceSink.getConf().clone(); + intersectSmallTableReduceSinkDesc.setPartitionCols(new ArrayList()); + intersectSmallTableReduceSinkDesc.setNumReducers(1); + intersectSmallTableReduceSinkDesc.setOutputName("intersect"); + + ReduceSinkOperator intersectSmallTableReduceSink = + (ReduceSinkOperator) OperatorFactory.get( + smallTableReduceSink.getCompilationOpContext(), + intersectSmallTableReduceSinkDesc); + intersectSmallTableReduceSink.setColumnExprMap(new HashMap()); + + // Connect smallTableReduceSinkParent and intersectSmallTableReduceSink. + smallTableReduceSinkParent.getChildOperators().add(intersectSmallTableReduceSink); + intersectSmallTableReduceSink.getParentOperators().add(smallTableReduceSinkParent); + + return intersectSmallTableReduceSink; + } + + /* + * Create FULL OUTER INTERSECT MapJoin. + */ + private static MapJoinOperator createIntersectMapJoin( + ReduceSinkOperator intersectSmallTableReduceSink, + List> intersectMapJoinParents, + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, int posSmallTable) + throws SemanticException{ + + MapJoinDesc interceptMapJoinDesc = new MapJoinDesc(mapJoinDesc); + interceptMapJoinDesc.setStatistics(mapJoinDesc.getStatistics()); + interceptMapJoinDesc.setTagOrder(mapJoinDesc.getTagOrder()); + interceptMapJoinDesc.setNullSafes(mapJoinDesc.getNullSafes()); + interceptMapJoinDesc.setFilterMap(mapJoinDesc.getFilterMap()); + + interceptMapJoinDesc.setResidualFilterExprs(mapJoinDesc.getResidualFilterExprs()); + interceptMapJoinDesc.setColumnExprMap(mapJoinDesc.getColumnExprMap()); + + interceptMapJoinDesc.setFullOuterIntersect(true); + + MapJoinOperator intersectMapJoinOp = + (MapJoinOperator) OperatorFactory.get( + mapJoinOp.getCompilationOpContext(), + interceptMapJoinDesc); + + // Make intersectMapJoin a child of intersectSmallTableReduceSink. + intersectSmallTableReduceSink.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posSmallTable, intersectSmallTableReduceSink); + + return intersectMapJoinOp; + } + + /* + * Create auxiliary ReduceSink that sends first-time key matches from FULL OUTER MapJoin to + * FULL OUTER INTERSECT MapJoin. + */ + private static ReduceSinkOperator addAuxiliaryReduceSink( + MapJoinOperator mapJoinOp, MapJoinDesc mapJoinDesc, + MapJoinBigTableInfo mapJoinBigTableInfo) + throws SemanticException { + + // Get the column names of the aggregations for reduce sink + List mapJoinOutputNames = mapJoinOp.getConf().getOutputColumnNames(); + + ArrayList mapJoinSignature = mapJoinOp.getSchema().getSignature(); + + ArrayList auxiliaryReduceSinkKeyExprs = new ArrayList(); + int[] bigTableOutputKeyColumnNums = mapJoinBigTableInfo.getOutputKeyColumnNums(); + final int bigTableOutputKeySize = bigTableOutputKeyColumnNums.length; + for (int i = 0; i < bigTableOutputKeySize; i++) { + final int bigTableOutputKeyColumnNum = bigTableOutputKeyColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputKeyColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputKeyColumnNum), "", false); + auxiliaryReduceSinkKeyExprs.add(colExpr); + } + + ArrayList auxiliaryReduceSinkValueExprs = new ArrayList(); + List auxiliaryValueOutputColumnNames = new ArrayList(); + int[] bigTableOutputValueColumnNums = mapJoinBigTableInfo.getOutputValueColumnNums(); + final int bigTableOutputValueSize = bigTableOutputValueColumnNums.length; + for (int i = 0; i < bigTableOutputValueSize; i++) { + final int bigTableOutputValueColumnNum = bigTableOutputValueColumnNums[i]; + ExprNodeColumnDesc colExpr = + new ExprNodeColumnDesc( + mapJoinSignature.get(bigTableOutputValueColumnNum).getType(), + mapJoinOutputNames.get(bigTableOutputValueColumnNum), "", false); + auxiliaryReduceSinkValueExprs.add(colExpr); + auxiliaryValueOutputColumnNames.add("_col" + i); + } + + ReduceSinkDesc auxiliaryReduceSinkDesc = + PlanUtils.getReduceSinkDesc( + auxiliaryReduceSinkKeyExprs, + auxiliaryReduceSinkValueExprs, + auxiliaryValueOutputColumnNames, + false, -1, 0, 1, Operation.NOT_ACID); + auxiliaryReduceSinkDesc.setPartitionCols(new ArrayList()); + auxiliaryReduceSinkDesc.setNumReducers(1); + auxiliaryReduceSinkDesc.setOutputName("auxiliaryIntersect"); + + ReduceSinkOperator auxiliaryReduceSink = + (ReduceSinkOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSinkDesc, + new RowSchema(mapJoinOp.getSchema()), + mapJoinOp); + auxiliaryReduceSink.setColumnExprMap(new HashMap()); + + return auxiliaryReduceSink; + } + + /* + * Add a SELECT as the root of the FULL OUTER INTERSECT Reducer to rename the Reduce-Shuffle + * column names to the ones needed by FULL OUTER INTERSECT MapJoin. + */ + private static SelectOperator addRenameSelect( + ReduceSinkOperator auxiliaryReduceSink, + MapJoinBigTableInfo mapJoinBigTableInfo) + throws SemanticException{ + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + // A rename SELECT that maps column names... + Map renameSelectColNameToExprMap = new HashMap(); + + // Order these maps by input column number. + Map renameSelectColNumToExprMap = new TreeMap(); + Map renameSelectColNumToOutputNameMap = new TreeMap(); + + /* + * Keys. + */ + ArrayList auxiliarReduceSinkKeyCols = auxiliaryReduceSinkDesc.getKeyCols(); + List auxiliarReduceSinkOutputKeyColumnNames = + auxiliaryReduceSinkDesc.getOutputKeyColumnNames(); + int[] bigTableInputKeyColumnMap = mapJoinBigTableInfo.getInputKeyColumnMap(); + String[] bigTableInputKeyColumnNames = mapJoinBigTableInfo.getInputKeyColumnNames(); + final int renameKeySize = auxiliarReduceSinkKeyCols.size(); + int columnNum = 0; + String keyPrefix = Utilities.ReduceField.KEY.name() + "."; + for (int i = 0; i < renameKeySize; i++) { + String inputColumnName = keyPrefix + auxiliarReduceSinkOutputKeyColumnNames.get(i); + ExprNodeColumnDesc keyColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkKeyCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + keyColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputKeyColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + final String outputColumnName = bigTableInputKeyColumnNames[i]; + renameSelectColNumToOutputNameMap.put(inputColumnNum, outputColumnName); + } + + /* + * Values. + */ + ArrayList auxiliarReduceSinkValueCols = auxiliaryReduceSinkDesc.getValueCols(); + List auxiliarReduceSinkOutputValueColumnNames = + auxiliaryReduceSinkDesc.getOutputValueColumnNames(); + int[] bigTableInputValueColumnMap = mapJoinBigTableInfo.getInputValueColumnMap(); + String[] bigTableInputValueColumnNames = mapJoinBigTableInfo.getInputValueColumnNames(); + final int renameValueSize = auxiliarReduceSinkValueCols.size(); + String valuePrefix = Utilities.ReduceField.VALUE.name() + "."; + for (int i = 0; i < renameValueSize; i++) { + String inputColumnName = valuePrefix + auxiliarReduceSinkOutputValueColumnNames.get(i); + ExprNodeColumnDesc valueColExpr = (ExprNodeColumnDesc) auxiliarReduceSinkValueCols.get(i); + ExprNodeColumnDesc replaceColExpr = + new ExprNodeColumnDesc( + valueColExpr.getTypeInfo(), + inputColumnName, "", false); + renameSelectColNameToExprMap.put(inputColumnName, replaceColExpr); + + final int inputColumnNum = bigTableInputValueColumnMap[i]; + renameSelectColNumToExprMap.put(inputColumnNum, replaceColExpr); + final String outputColumnName = bigTableInputValueColumnNames[i]; + renameSelectColNumToOutputNameMap.put(inputColumnNum, outputColumnName); + } + + List renameSelectColExprs = new ArrayList(); + renameSelectColExprs.addAll(renameSelectColNumToExprMap.values()); + + List renameSelectOutputColumnNames = new ArrayList(); + renameSelectOutputColumnNames.addAll(renameSelectColNumToOutputNameMap.values()); + + ArrayList renameSelectColumnInfo = new ArrayList(); + + final int renameSelectSize = renameSelectColExprs.size(); + for (int i = 0; i < renameSelectSize; i++) { + String outputColumnName = renameSelectOutputColumnNames.get(i); + ColumnInfo colInfo = + new ColumnInfo( + outputColumnName, + renameSelectColExprs.get(i).getTypeInfo(), + "", false); + renameSelectColumnInfo.add(colInfo); + } + + SelectDesc renameSelectDesc = + new SelectDesc( + renameSelectColExprs, + renameSelectOutputColumnNames); + + SelectOperator renameSelect = + (SelectOperator) OperatorFactory.get( + auxiliaryReduceSink.getCompilationOpContext(), + renameSelectDesc); + renameSelect.setSchema(new RowSchema(renameSelectColumnInfo)); + renameSelect.setColumnExprMap(renameSelectColNameToExprMap); + + return renameSelect; + } + + public static void generateSharedMemoryPlan(MapJoinOperator mapJoinOp) + throws SemanticException { + + MapJoinDesc mapJoinDesc = mapJoinOp.getConf(); + + int posBigTable = mapJoinDesc.getPosBigTable(); + int posSmallTable = (posBigTable == 0 ? 1 : 0); + + /* + * Add 2nd ReduceSink for sending Small Table to FULL OUTER INTERSECT MapJoin, too. + */ + ReduceSinkOperator intersectSmallTableReduceSink = + addIntersectSmallTableReduceSink(mapJoinOp, posSmallTable); + + /* + * Create FULL OUTER INTERSECT MapJoin. + */ + + // Get ready to set the FULL OUTER INTERCEPT MapJoin parents. + List> intersectMapJoinParents = + new ArrayList>(); + intersectMapJoinParents.add(null); + intersectMapJoinParents.add(null); + + MapJoinOperator intersectMapJoinOp = + createIntersectMapJoin( + intersectSmallTableReduceSink, + intersectMapJoinParents, + mapJoinOp, mapJoinDesc, posSmallTable); + + /* + * Create auxiliary ReduceSink that sends first-time key matches from FULL OUTER MapJoin to + * FULL OUTER INTERSECT MapJoin. + */ + MapJoinBigTableInfo mapJoinBigTableInfo = + VectorMapJoinBaseOperator.getBigTableInfo(mapJoinDesc); + + ReduceSinkOperator auxiliaryReduceSink = + addAuxiliaryReduceSink(mapJoinOp, mapJoinDesc, mapJoinBigTableInfo); + + ReduceSinkDesc auxiliaryReduceSinkDesc = auxiliaryReduceSink.getConf(); + + /* + * Add a SELECT as the root of the FULL OUTER INTERSECT Reducer to rename the Reduce-Shuffle + * column names to the ones needed by FULL OUTER INTERSECT MapJoin. + */ + SelectOperator renameSelect = + addRenameSelect( + auxiliaryReduceSink, + mapJoinBigTableInfo); + + /* + * Do current new operators connecting. + */ + auxiliaryReduceSink.getChildOperators().add(renameSelect); + renameSelect.getParentOperators().add(auxiliaryReduceSink); + + renameSelect.getChildOperators().add(intersectMapJoinOp); + + intersectMapJoinParents.set(posBigTable, renameSelect); + intersectMapJoinOp.setParentOperators(intersectMapJoinParents); + + /* + * Put the special UNION operator in to combine the output of FULL OUTER MapJoin and + * FULL OUTER INTERSECT MapJoin operators. + */ + + // Detatch child below MapJoin. + Operator mapJoinChild = mapJoinOp.getChildOperators().get(0); + mapJoinOp.setChildOperators(new ArrayList>()); + mapJoinChild.setParentOperators(new ArrayList>()); + + ArrayList> unionParents = + new ArrayList>(); + unionParents.add(mapJoinOp); + unionParents.add(intersectMapJoinOp); + + UnionOperator unionOp = + (UnionOperator) OperatorFactory.getAndMakeChild( + auxiliaryReduceSink.getCompilationOpContext(), + new UnionDesc(), + new RowSchema(mapJoinOp.getSchema().getSignature()), + unionParents); + + unionOp.getChildOperators().add(mapJoinChild); + mapJoinChild.getParentOperators().add(unionOp); + + mapJoinOp.getChildOperators().add(auxiliaryReduceSink); + } + + public static void removeFilterMap(MapJoinDesc mapJoinDesc) throws SemanticException { + int[][] filterMaps = mapJoinDesc.getFilterMap(); + if (filterMaps == null) { + return; + } + final byte posBigTable = (byte) mapJoinDesc.getPosBigTable(); + final int numAliases = mapJoinDesc.getExprs().size(); + List valueFilteredTblDescs = mapJoinDesc.getValueFilteredTblDescs(); + for (byte pos = 0; pos < numAliases; pos++) { + if (pos != posBigTable) { + int[] filterMap = filterMaps[pos]; + TableDesc tableDesc = valueFilteredTblDescs.get(pos); + Properties properties = tableDesc.getProperties(); + String columnNameProperty = properties.getProperty(serdeConstants.LIST_COLUMNS); + String columnNameDelimiter = + properties.containsKey(serdeConstants.COLUMN_NAME_DELIMITER) ? + properties.getProperty(serdeConstants.COLUMN_NAME_DELIMITER) : + String.valueOf(SerDeUtils.COMMA); + + String columnTypeProperty = properties.getProperty(serdeConstants.LIST_COLUMN_TYPES); + List columnNameList; + if (columnNameProperty.length() == 0) { + columnNameList = new ArrayList(); + } else { + columnNameList = Arrays.asList(columnNameProperty.split(columnNameDelimiter)); + } + List truncatedColumnNameList = columnNameList.subList(0, columnNameList.size() - 1); + String truncatedColumnNameProperty = + Joiner.on(columnNameDelimiter).join(truncatedColumnNameList); + + List columnTypeList; + if (columnTypeProperty.length() == 0) { + columnTypeList = new ArrayList(); + } else { + columnTypeList = TypeInfoUtils + .getTypeInfosFromTypeString(columnTypeProperty); + } + if (!columnTypeList.get(columnTypeList.size() - 1).equals(TypeInfoFactory.shortTypeInfo)) { + throw new SemanticException("Expecting filterTag smallint as last column type"); + } + List truncatedColumnTypeList = + columnTypeList.subList(0, columnTypeList.size() - 1); + String truncatedColumnTypeProperty = + Joiner.on(",").join(truncatedColumnTypeList); + + properties.setProperty(serdeConstants.LIST_COLUMNS, truncatedColumnNameProperty); + properties.setProperty(serdeConstants.LIST_COLUMN_TYPES, truncatedColumnTypeProperty); + } + } + mapJoinDesc.setFilterMap(null); + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java index 275a31f..1303591 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java @@ -356,6 +356,39 @@ public MapJoinOperator convertMapJoin(HiveConf conf, return mapJoinOp; } + public static boolean determineEnableFullOuterMapJoin(HiveConf hiveConf, JoinDesc joinDesc) { + if (joinDesc.getConds().length > 1) { + + // No multiple condition FULL OUTER MapJoin. + return false; + }; + if (HiveConf.getVar( + hiveConf, + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("mr")) { + + // Only Tez, Spark, etc. + return false; + } + if (HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEUSEHYBRIDGRACEHASHJOIN)) { + return false; + } + if (!HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVEMAPJOINFULLOUER)) { + return false; + } + if (HiveConf.getBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_TEST_MAPJOINFULLOUER_OVERRIDE)) { + + // Ignore the HIVEMAPJOINFULLOUER setting. + return false; + } + return true; + } + public static MapJoinOperator convertJoinOpMapJoinOp(HiveConf hconf, JoinOperator op, boolean leftInputJoin, String[] baseSrc, List mapAliases, int mapJoinPos, boolean noCheckOuterJoin) throws SemanticException { @@ -502,6 +535,11 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o * @return set of big table candidates */ public static Set getBigTableCandidates(JoinCondDesc[] condns) { + return getBigTableCandidates(condns, false); + } + + public static Set getBigTableCandidates(JoinCondDesc[] condns, + boolean isEnableFullOuterJoin) { Set bigTableCandidates = new HashSet(); boolean seenOuterJoin = false; @@ -516,14 +554,18 @@ public MapJoinOperator generateMapJoinOperator(ParseContext pctx, JoinOperator o seenPostitions.add(condn.getRight()); if (joinType == JoinDesc.FULL_OUTER_JOIN) { - // setting these 2 parameters here just in case that if the code got - // changed in future, these 2 are not missing. + // UNDONE: For now, pretend it is a LEFT OUTER JOIN... seenOuterJoin = true; lastSeenRightOuterJoin = false; - // empty set - cannot convert - return new HashSet(); - } else if (joinType == JoinDesc.LEFT_OUTER_JOIN - || joinType == JoinDesc.LEFT_SEMI_JOIN) { + if (!isEnableFullOuterJoin) { + // Empty set - cannot convert + return new HashSet(); + } + if(bigTableCandidates.size() == 0) { + bigTableCandidates.add(condn.getLeft()); + } + } else if (joinType == JoinDesc.LEFT_OUTER_JOIN || + joinType == JoinDesc.LEFT_SEMI_JOIN) { seenOuterJoin = true; if(bigTableCandidates.size() == 0) { bigTableCandidates.add(condn.getLeft()); @@ -1044,6 +1086,7 @@ public static MapJoinDesc getMapJoinDesc(HiveConf hconf, JoinCondDesc[] condns = desc.getConds(); Byte[] tagOrder = desc.getTagOrder(); + // UNDONE: Fix this comment... // outer join cannot be performed on a table which is being cached if (!noCheckOuterJoin) { if (checkMapJoin(mapJoinPos, condns) < 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index 13a2fc4..3012296 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -67,6 +67,12 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterLongOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterMultiKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterStringOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectLongOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectMultiKeyOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.VectorMapJoinFullOuterIntersectStringOperator; import org.apache.hadoop.hive.ql.exec.vector.ptf.VectorPTFOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkEmptyKeyOperator; import org.apache.hadoop.hive.ql.exec.vector.reducesink.VectorReduceSinkLongOperator; @@ -121,6 +127,8 @@ import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MergeJoinWork; +import org.apache.hadoop.hive.ql.plan.OpTraits; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PTFDesc; import org.apache.hadoop.hive.ql.plan.SelectDesc; @@ -709,10 +717,61 @@ public VectorDesc getVectorDesc() { } } - private List> newOperatorList() { + private static List> newOperatorList() { return new ArrayList>(); } + public static void debugDisplayJoinOperatorTree(Operator joinOperator, + String prefix) { + List> currentParentList = newOperatorList(); + currentParentList.add(joinOperator); + + int depth = 0; + do { + List> nextParentList = newOperatorList(); + + final int count = currentParentList.size(); + for (int i = 0; i < count; i++) { + Operator parent = currentParentList.get(i); + System.out.println(prefix + " parent depth " + depth + " " + parent.getClass().getSimpleName() + " " + parent.toString()); + + List> parentList = parent.getParentOperators(); + if (parentList == null || parentList.size() == 0) { + continue; + } + + nextParentList.addAll(parentList); + } + + currentParentList = nextParentList; + depth--; + } while (currentParentList.size() > 0); + + List> currentChildList = newOperatorList(); + currentChildList.addAll(joinOperator.getChildOperators()); + + depth = 1; + do { + List> nextChildList = newOperatorList(); + + final int count = currentChildList.size(); + for (int i = 0; i < count; i++) { + Operator child = currentChildList.get(i); + System.out.println(prefix + " child depth " + depth + " " + child.getClass().getSimpleName() + " " + child.toString()); + + List> childList = child.getChildOperators(); + if (childList == null || childList.size() == 0) { + continue; + } + + nextChildList.addAll(childList); + } + + currentChildList = nextChildList; + depth--; + } while (currentChildList.size() > 0); + } + private Operator validateAndVectorizeOperatorTree( Operator nonVecRootOperator, boolean isReduce, boolean isTezOrSpark, @@ -946,6 +1005,11 @@ public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) if (isReduceVectorizationEnabled) { convertReduceWork(reduceWork); } + } else if (baseWork instanceof MergeJoinWork){ + MergeJoinWork mergeJoinWork = (MergeJoinWork) baseWork; + + // MergeJoinExplainVectorization will say vectorization not supported. + mergeJoinWork.setVectorizationExamined(true); } } } else if (currTask instanceof SparkTask) { @@ -2928,7 +2992,7 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { HashTableImplementationType hashTableImplementationType = HashTableImplementationType.NONE; HashTableKind hashTableKind = HashTableKind.NONE; HashTableKeyType hashTableKeyType = HashTableKeyType.NONE; - VectorMapJoinVariation vectorMapJoinVariation = VectorMapJoinVariation.NONE; + VectorMapJoinVariation vectorMapJoinVariation = null; if (vectorDesc.getIsFastHashTableEnabled()) { hashTableImplementationType = HashTableImplementationType.FAST; @@ -2998,6 +3062,10 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorMapJoinVariation = VectorMapJoinVariation.OUTER; hashTableKind = HashTableKind.HASH_MAP; break; + case JoinDesc.FULL_OUTER_JOIN: + vectorMapJoinVariation = VectorMapJoinVariation.FULL_OUTER; + hashTableKind = HashTableKind.HASH_MAP; + break; case JoinDesc.LEFT_SEMI_JOIN: vectorMapJoinVariation = VectorMapJoinVariation.LEFT_SEMI; hashTableKind = HashTableKind.HASH_SET; @@ -3027,6 +3095,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterLongOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectLongOperator.class; + } else { + opClass = VectorMapJoinFullOuterLongOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3045,6 +3120,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterStringOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectStringOperator.class; + } else { + opClass = VectorMapJoinFullOuterStringOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3063,6 +3145,13 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { case OUTER: opClass = VectorMapJoinOuterMultiKeyOperator.class; break; + case FULL_OUTER: + if (desc.isFullOuterIntersect()) { + opClass = VectorMapJoinFullOuterIntersectMultiKeyOperator.class; + } else { + opClass = VectorMapJoinFullOuterMultiKeyOperator.class; + } + break; default: throw new HiveException("Unknown operator variation " + vectorMapJoinVariation); } @@ -3078,6 +3167,11 @@ private boolean isBigTableOnlyResults(MapJoinDesc desc) { vectorDesc.setHashTableKind(hashTableKind); vectorDesc.setHashTableKeyType(hashTableKeyType); vectorDesc.setVectorMapJoinVariation(vectorMapJoinVariation); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + // UNDONE: Not needed for SHARED-MEMORY Non-INTERSECT. + vectorDesc.setIsSaveNullKeyValuesForFullOuter(true); + } vectorDesc.setMinMaxEnabled(minMaxEnabled); vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -3190,6 +3284,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi /* * Similarly, we need a mapping since a value expression can be a calculation and the value * will go into a scratch column. + * + * Value expressions include keys? YES. */ int[] bigTableValueColumnMap = new int[allBigTableValueExpressions.length]; String[] bigTableValueColumnNames = new String[allBigTableValueExpressions.length]; @@ -3229,18 +3325,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi vectorDesc.setAllBigTableValueExpressions(allBigTableValueExpressions); /* - * Small table information. + * Column mapping. */ - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); Byte[] order = desc.getTagOrder(); Byte posSingleVectorMapJoinSmallTable = (order[0] == posBigTable ? order[1] : order[0]); @@ -3250,7 +3352,6 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi * Gather up big and small table output result information from the MapJoinDesc. */ List bigTableRetainList = desc.getRetainList().get(posBigTable); - int bigTableRetainSize = bigTableRetainList.size(); int[] smallTableIndices; int smallTableIndicesSize; @@ -3287,6 +3388,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); int nextOutputColumn = (order[0] == posBigTable ? 0 : smallTableResultSize); + + final int bigTableRetainSize = bigTableRetainList.size(); for (int i = 0; i < bigTableRetainSize; i++) { // Since bigTableValueExpressions may do a calculation and produce a scratch column, we @@ -3300,9 +3403,10 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi projectionMapping.add(nextOutputColumn, batchColumnIndex, typeInfo); // Collect columns we copy from the big table batch to the overflow batch. - if (!bigTableRetainedMapping.containsOutputColumn(batchColumnIndex)) { + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + // Tolerate repeated use of a big table column. - bigTableRetainedMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); } nextOutputColumn++; @@ -3319,10 +3423,8 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi nextOutputColumn = firstSmallTableOutputColumn; // Small table indices has more information (i.e. keys) than retain, so use it if it exists... - String[] bigTableRetainedNames; if (smallTableIndicesSize > 0) { smallTableOutputCount = smallTableIndicesSize; - bigTableRetainedNames = new String[smallTableOutputCount]; for (int i = 0; i < smallTableIndicesSize; i++) { if (smallTableIndices[i] >= 0) { @@ -3334,34 +3436,39 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Since bigTableKeyExpressions may do a calculation and produce a scratch column, we // need to map the right column. - int batchKeyColumn = bigTableKeyColumnMap[keyIndex]; - bigTableRetainedNames[i] = bigTableKeyColumnNames[keyIndex]; + int bigTableKeyColumn = bigTableKeyColumnMap[keyIndex]; TypeInfo typeInfo = bigTableKeyTypeInfos[keyIndex]; if (!isOuterJoin) { // Optimize inner join keys of small table results. + // UNDONE: The columns seem backwards here... // Project the big table key into the small table result "area". - projectionMapping.add(nextOutputColumn, batchKeyColumn, typeInfo); + projectionMapping.add(nextOutputColumn, bigTableKeyColumn, typeInfo); + + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumn)) { - if (!bigTableRetainedMapping.containsOutputColumn(batchKeyColumn)) { - // If necessary, copy the big table key into the overflow batch's small table - // result "area". - bigTableRetainedMapping.add(batchKeyColumn, batchKeyColumn, typeInfo); + // When the Big Key is not retained in the output result, we do need to copy the + // Big Table key into the overflow batch so the projection of it (Big Table key) to + // the Small Table key will work properly... + // + nonOuterSmallTableKeyMapping.add(bigTableKeyColumn, bigTableKeyColumn, typeInfo); } } else { - // For outer joins, since the small table key can be null when there is no match, + // For outer joins, since the small table key can be null when there for NOMATCH, // we must have a physical (scratch) column for those keys. We cannot use the - // projection optimization used by inner joins above. + // projection optimization used by non-[FULL} OUTER joins above. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - bigTableRetainedMapping.add(batchKeyColumn, scratchColumn, typeInfo); + outerSmallTableKeyMapping.add(bigTableKeyColumn, scratchColumn, typeInfo); - bigTableOuterKeyMapping.add(batchKeyColumn, scratchColumn, typeInfo); + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(keyIndex, scratchColumn, typeInfo); } } else { @@ -3375,21 +3482,18 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); // Make a new big table scratch column for the small table value. int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); } nextOutputColumn++; } } else if (smallTableRetainSize > 0) { smallTableOutputCount = smallTableRetainSize; - bigTableRetainedNames = new String[smallTableOutputCount]; // Only small table values appear in join output result. @@ -3402,21 +3506,24 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi smallTableExprVectorizes = false; } - bigTableRetainedNames[i] = smallTableExprNode.toString(); - // Make a new big table scratch column for the small table value. TypeInfo typeInfo = smallTableExprNode.getTypeInfo(); int scratchColumn = vContext.allocateScratchColumn(typeInfo); projectionMapping.add(nextOutputColumn, scratchColumn, typeInfo); - smallTableMapping.add(smallTableValueIndex, scratchColumn, typeInfo); + smallTableValueMapping.add(smallTableValueIndex, scratchColumn, typeInfo); nextOutputColumn++; } - } else { - bigTableRetainedNames = new String[0]; } + Map> filterExpressions = desc.getFilters(); + VectorExpression[] bigTableFilterExpressions = + vContext.getVectorExpressions( + filterExpressions.get(posBigTable), + VectorExpressionDescriptor.Mode.FILTER); + vectorMapJoinInfo.setBigTableFilterExpressions(bigTableFilterExpressions); + boolean useOptimizedTable = HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEMAPJOINUSEOPTIMIZEDTABLE); @@ -3472,15 +3579,23 @@ private boolean canSpecializeMapJoin(Operator op, MapJoi // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - smallTableMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); + + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -4111,8 +4226,6 @@ private boolean usesVectorUDFAdaptor(VectorExpression[] vecExprs) { return new ImmutablePair, String>(vectorOp, null); } - static int fake; - public static Operator vectorizeSelectOperator( Operator selectOp, VectorizationContext vContext, VectorSelectDesc vectorSelectDesc) diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java index 105ef08..73882d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainConfiguration.java @@ -47,6 +47,7 @@ private boolean vectorization = false; private boolean vectorizationOnly = false; private VectorizationDetailLevel vectorizationDetailLevel = VectorizationDetailLevel.SUMMARY; + private boolean debug = false; private Path explainRootPath; private Map opIdToRuntimeNumRows; @@ -137,6 +138,14 @@ public void setVectorizationDetailLevel(VectorizationDetailLevel vectorizationDe this.vectorizationDetailLevel = vectorizationDetailLevel; } + public boolean isDebug() { + return debug; + } + + public void setDebug(boolean debug) { + this.debug = debug; + } + public Path getExplainRootPath() { return explainRootPath; } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java index 63b13c8..75ba3af 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ExplainSemanticAnalyzer.java @@ -111,6 +111,8 @@ public void analyzeInternal(ASTNode ast) throws SemanticException { i++; } } + } else if (explainOptions == HiveParser.KW_DEBUG) { + config.setDebug(true); } else { // UNDONE: UNKNOWN OPTION? } diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g index a1ec96c..f70fc67 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveLexer.g @@ -187,6 +187,7 @@ KW_FILE: 'FILE'; KW_JAR: 'JAR'; KW_EXPLAIN: 'EXPLAIN'; KW_EXTENDED: 'EXTENDED'; +KW_DEBUG: 'DEBUG'; KW_FORMATTED: 'FORMATTED'; KW_DEPENDENCY: 'DEPENDENCY'; KW_LOGICAL: 'LOGICAL'; diff --git ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g index 3abc752..8a90c8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g +++ ql/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g @@ -558,6 +558,7 @@ import org.apache.hadoop.hive.conf.HiveConf; xlateMap.put("KW_FUNCTION", "FUNCTION"); xlateMap.put("KW_EXPLAIN", "EXPLAIN"); xlateMap.put("KW_EXTENDED", "EXTENDED"); + xlateMap.put("KW_DEBUG", "DEBUG"); xlateMap.put("KW_SERDE", "SERDE"); xlateMap.put("KW_WITH", "WITH"); xlateMap.put("KW_SERDEPROPERTIES", "SERDEPROPERTIES"); @@ -789,6 +790,7 @@ explainOption | KW_ANALYZE | KW_REOPTIMIZATION | (KW_VECTORIZATION vectorizationOnly? vectorizatonDetail?) + | KW_DEBUG ; vectorizationOnly diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java index da30243..030bb61 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/Explain.java @@ -28,7 +28,7 @@ @Retention(RetentionPolicy.RUNTIME) public @interface Explain { public enum Level { - USER, DEFAULT, EXTENDED; + USER, DEFAULT, EXTENDED, DEBUG; public boolean in(Level[] levels) { for (Level level : levels) { if (level.equals(this)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java index cde7852..a854819 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ExplainWork.java @@ -130,6 +130,10 @@ public VectorizationDetailLevel isVectorizationDetailLevel() { return config.getVectorizationDetailLevel(); } + public boolean isDebug() { + return config.isDebug(); + } + public ParseContext getParseContext() { return pCtx; } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java index ea22131..acbbba9 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinCondDesc.java @@ -125,7 +125,7 @@ public String getJoinCondString() { sb.append("Inner Join "); break; case JoinDesc.FULL_OUTER_JOIN: - sb.append("Outer Join "); + sb.append("Full Outer Join "); break; case JoinDesc.LEFT_OUTER_JOIN: sb.append("Left Outer Join "); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java index 5b7f4c3..1fa97f4 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JoinDesc.java @@ -560,6 +560,9 @@ public void setFilterMap(int[][] filterMap) { return null; } filterMap = compactFilter(filterMap); + if (filterMap == null) { + return null; + } Map result = new LinkedHashMap(); for (int i = 0 ; i < filterMap.length; i++) { if (filterMap[i] == null) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java index 91ea159..8e0ce22 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java @@ -33,11 +33,18 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.MemoryMonitorInfo; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.serde2.AbstractSerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.SerDeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hive.common.util.ReflectionUtil; /** * Map Join operator Descriptor implementation. @@ -82,6 +89,7 @@ private boolean isHybridHashJoin; private boolean isDynamicPartitionHashJoin = false; + private boolean isFullOuterIntersect = false; public MapJoinDesc() { bigTableBucketNumMapping = new LinkedHashMap(); @@ -92,6 +100,7 @@ public MapJoinDesc(MapJoinDesc clone) { this.keys = clone.keys; this.keyTblDesc = clone.keyTblDesc; this.valueTblDescs = clone.valueTblDescs; + this.valueFilteredTblDescs = clone.valueFilteredTblDescs; this.posBigTable = clone.posBigTable; this.valueIndices = clone.valueIndices; this.retainList = clone.retainList; @@ -211,6 +220,15 @@ public void setDumpFilePrefix(String dumpFilePrefix) { this.dumpFilePrefix = dumpFilePrefix; } + @Explain(displayName = "keyExpressions", explainLevels = { Level.DEBUG }) + public Map getKeyExpressionString() { + Map keyMap = new LinkedHashMap(); + for (Map.Entry> k: getKeys().entrySet()) { + keyMap.put(k.getKey(), k.getValue().toString()); + } + return keyMap; + } + /** * @return the keys in string form */ @@ -296,6 +314,58 @@ public void setValueFilteredTblDescs(List valueFilteredTblDescs) { return valueTblDescs; } + @Explain(displayName = "keyContext", explainLevels = { Level.DEBUG }) + public String getDebugKeyContext() { + MapJoinObjectSerDeContext keyContext; + try { + AbstractSerDe keySerde = + (AbstractSerDe) ReflectionUtil.newInstance( + keyTblDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(keySerde, null, keyTblDesc.getProperties(), null); + keyContext = new MapJoinObjectSerDeContext(keySerde, false); + } catch (SerDeException e) { + return null; + } + return keyContext.stringify(); + } + + private boolean hasFilter(int alias, int[][] filterMaps) { + return filterMaps != null && filterMaps[alias] != null; + } + + @Explain(displayName = "valueContexts", explainLevels = { Level.DEBUG }) + public String getDebugValueContext() { + List valueContextStringList = new ArrayList(); + try { + boolean noOuterJoin = getNoOuterJoin(); + // Order in which the results should be output. + Byte[] order = getTagOrder(); + int[][] filterMaps = getFilterMap(); + + for (int pos = 0; pos < order.length; pos++) { + if (pos == posBigTable) { + continue; + } + TableDesc valueTableDesc; + if (noOuterJoin) { + valueTableDesc = getValueTblDescs().get(pos); + } else { + valueTableDesc = getValueFilteredTblDescs().get(pos); + } + AbstractSerDe valueSerDe = + (AbstractSerDe) ReflectionUtil.newInstance( + valueTableDesc.getDeserializerClass(), null); + SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); + MapJoinObjectSerDeContext valueContext = + new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos, filterMaps)); + valueContextStringList.add(pos + ":" + valueContext.stringify()); + } + } catch (SerDeException e) { + return null; + } + return valueContextStringList.toString(); + } + /** * @param valueTblDescs * the valueTblDescs to set @@ -383,6 +453,8 @@ public boolean getGenJoinKeys() { return genJoinKeys; } + @Explain(displayName = "DynamicPartitionHashJoin", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }, displayOnlyOnTrue = true) public boolean isDynamicPartitionHashJoin() { return isDynamicPartitionHashJoin; } @@ -391,6 +463,35 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { this.isDynamicPartitionHashJoin = isDistributedHashJoin; } + @Explain(displayName = "fullOuterIntersect", explainLevels = { Level.USER, Level.DEFAULT, + Level.EXTENDED }, displayOnlyOnTrue = true) + public boolean isFullOuterIntersect() { + return isFullOuterIntersect; + } + + public void setFullOuterIntersect(boolean isFullOuterIntersect) { + this.isFullOuterIntersect = isFullOuterIntersect; + } + + // Debug only for OUTER MapJoin. + @Explain(displayName = "outer filter mappings", explainLevels = { Level.DEBUG }) + public String getDebugOuterFilterMapString() { + if (conds.length != 1) { + return null; + } + JoinCondDesc cond = conds[0]; + if (cond.getType() != JoinDesc.FULL_OUTER_JOIN && + cond.getType() != JoinDesc.LEFT_OUTER_JOIN && + cond.getType() != JoinDesc.RIGHT_OUTER_JOIN) { + return null; + } + int[][] fm = getFilterMap(); + if (fm == null) { + return null; + } + return Arrays.deepToString(fm); + } + // Use LinkedHashSet to give predictable display order. private static final Set vectorizableMapJoinNativeEngines = new LinkedHashSet(Arrays.asList("tez", "spark")); @@ -406,7 +507,9 @@ public void setDynamicPartitionHashJoin(boolean isDistributedHashJoin) { public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, VectorMapJoinDesc vectorMapJoinDesc) { // VectorMapJoinOperator is not native vectorized. - super(vectorMapJoinDesc, vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); + super( + vectorMapJoinDesc, + vectorMapJoinDesc.getHashTableImplementationType() != HashTableImplementationType.NONE); this.mapJoinDesc = mapJoinDesc; this.vectorMapJoinDesc = vectorMapJoinDesc; vectorMapJoinInfo = @@ -419,7 +522,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, String engine = vectorMapJoinDesc.getEngine(); String engineInSupportedCondName = - HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + engine + " IN " + vectorizableMapJoinNativeEngines; + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " " + + engine + " IN " + vectorizableMapJoinNativeEngines; boolean engineInSupported = vectorizableMapJoinNativeEngines.contains(engine); boolean isFastHashTableEnabled = vectorMapJoinDesc.getIsFastHashTableEnabled(); @@ -474,7 +578,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return conditions; } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -482,7 +587,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsMet(nativeConditions); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeConditionsNotMet", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeConditionsNotMet() { if (nativeConditions == null) { nativeConditions = createNativeConditions(); @@ -490,7 +596,8 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, return VectorizationCondition.getConditionsNotMet(nativeConditions); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableKeyExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "bigTableKeyExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableKeyExpressions() { return vectorExpressionsToStringList( isNative ? @@ -498,8 +605,18 @@ public MapJoinOperatorExplainVectorization(MapJoinDesc mapJoinDesc, vectorMapJoinDesc.getAllBigTableKeyExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableKeyColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "hashTableImplementationType", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String hashTableImplementationType() { + if (!isNative) { + return null; + } + return vectorMapJoinDesc.getHashTableImplementationType().name(); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableKeyColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableKeyColumns() { if (!isNative) { return null; } @@ -507,10 +624,13 @@ public String getBigTableKeyColumnNums() { if (bigTableKeyColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableKeyColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableKeyColumnMap(), + vectorMapJoinInfo.getBigTableKeyTypeInfos()); } - @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableValueExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getBigTableValueExpressions() { return vectorExpressionsToStringList( isNative ? @@ -518,8 +638,18 @@ public String getBigTableKeyColumnNums() { vectorMapJoinDesc.getAllBigTableValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableValueColumnNums() { + @Explain(vectorization = Vectorization.EXPRESSION, displayName = "bigTableFilterExpressions", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableFilterExpressions() { + if (!isNative) { + return null; + } + return vectorExpressionsToStringList(vectorMapJoinInfo.getBigTableFilterExpressions()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableValueColumns", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBigTableValueColumns() { if (!isNative) { return null; } @@ -527,48 +657,78 @@ public String getBigTableValueColumnNums() { if (bigTableValueColumnMap.length == 0) { return null; } - return Arrays.toString(bigTableValueColumnMap); + return outputColumnsAndTypesToStringList( + vectorMapJoinInfo.getBigTableValueColumnMap(), + vectorMapJoinInfo.getBigTableValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getSmallTableColumns() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "smallTableValueMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getSmallTableColumns() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getSmallTableMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getSmallTableValueMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutputColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getProjectedOutputColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "projectedOutput", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getProjectedOutputColumnNums() { if (!isNative) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getProjectionMapping()); + return outputColumnsAndTypesToStringList(vectorMapJoinInfo.getProjectionMapping()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableOuterKeyMapping", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public List getBigTableOuterKey() { - if (!isNative || vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainColumnNums", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getBigTableRetainedColumnNums() { + if (!isNative) { return null; } - return columnMappingToStringList(vectorMapJoinInfo.getBigTableOuterKeyMapping()); + return Arrays.toString(vectorMapJoinInfo.getBigTableRetainColumnMap()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bigTableRetainedColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBigTableRetainedColumnNums() { - if (!isNative) { + @Explain(vectorization = Vectorization.DETAIL, displayName = "nonOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public String getNonOuterSmallTableKeyMapping() { + if (!isNative || + (vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.OUTER || + vectorMapJoinDesc.getVectorMapJoinVariation() == VectorMapJoinVariation.FULL_OUTER)) { + return null; + } + return Arrays.toString(vectorMapJoinInfo.getNonOuterSmallTableKeyColumnMap()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "outerSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.OUTER) { + return null; + } + return columnMappingToStringList(vectorMapJoinInfo.getOuterSmallTableKeyMapping()); + } + + @Explain(vectorization = Vectorization.DETAIL, displayName = "fullOuterSmallTableKeyMapping", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getFullOuterSmallTableKeyMapping() { + if (!isNative || + vectorMapJoinDesc.getVectorMapJoinVariation() != VectorMapJoinVariation.FULL_OUTER) { return null; } - return outputColumnsToStringList(vectorMapJoinInfo.getBigTableRetainedMapping()); + return columnMappingToStringList(vectorMapJoinInfo.getFullOuterSmallTableKeyMapping()); } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "nativeNotSupportedKeyTypes", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public List getNativeNotSupportedKeyTypes() { return vectorMapJoinDesc.getNotSupportedKeyTypes(); } } - @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public MapJoinOperatorExplainVectorization getMapJoinVectorization() { VectorMapJoinDesc vectorMapJoinDesc = (VectorMapJoinDesc) getVectorDesc(); if (vectorMapJoinDesc == null || this instanceof SMBJoinDesc) { @@ -592,7 +752,8 @@ public SMBJoinOperatorExplainVectorization(SMBJoinDesc smbJoinDesc, } // Handle dual nature. - @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + @Explain(vectorization = Vectorization.OPERATOR, displayName = "SMB Map Join Vectorization", + explainLevels = { Level.DEFAULT, Level.EXTENDED }) public SMBJoinOperatorExplainVectorization getSMBJoinVectorization() { VectorSMBJoinDesc vectorSMBJoinDesc = (VectorSMBJoinDesc) getVectorDesc(); if (vectorSMBJoinDesc == null || !(this instanceof SMBJoinDesc)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java index b0ae64a..999e52b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MergeJoinWork.java @@ -25,11 +25,15 @@ import java.util.Map.Entry; import java.util.Set; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.CommonMergeJoinOperator; import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator; import org.apache.hadoop.hive.ql.exec.Operator; import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.plan.BaseWork.BaseExplainVectorization; import org.apache.hadoop.hive.ql.plan.Explain.Level; +import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; +import org.apache.hadoop.hive.ql.plan.ReduceWork.ReduceExplainVectorization; import org.apache.hadoop.mapred.JobConf; public class MergeJoinWork extends BaseWork { @@ -175,8 +179,55 @@ public void setLlapMode(boolean llapMode) { public boolean getLlapMode() { return getMainWork().getLlapMode(); } - + public void addDummyOp(HashTableDummyOperator dummyOp) { getMainWork().addDummyOp(dummyOp); } + public class MergeJoinExplainVectorization extends BaseExplainVectorization { + + private final MergeJoinWork mergeJoinWork; + + private VectorizationCondition[] mergeWorkVectorizationConditions; + + public MergeJoinExplainVectorization(MergeJoinWork mergeJoinWork) { + super(mergeJoinWork); + this.mergeJoinWork = mergeJoinWork; + } + + private VectorizationCondition[] createMergeWorkExplainVectorizationConditions() { + + boolean enabled = false; + + VectorizationCondition[] conditions = new VectorizationCondition[] { + new VectorizationCondition( + enabled, + "Vectorizing MergeJoin Supported") + }; + return conditions; + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsMet(mergeWorkVectorizationConditions); + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "enableConditionsNotMet", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getEnableConditionsNotMet() { + if (mergeWorkVectorizationConditions == null) { + mergeWorkVectorizationConditions = createMergeWorkExplainVectorizationConditions(); + } + return VectorizationCondition.getConditionsNotMet(mergeWorkVectorizationConditions); + } + } + + @Explain(vectorization = Vectorization.SUMMARY, displayName = "MergeJoin Vectorization", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public MergeJoinExplainVectorization getReduceExplainVectorization() { + if (!getVectorizationExamined()) { + return null; + } + return new MergeJoinExplainVectorization(this); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java index 446b810..5439e14 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/OperatorExplainVectorization.java @@ -59,6 +59,25 @@ public String outputColumnsToStringList(VectorColumnMapping vectorColumnMapping) return Arrays.toString(outputColumns); } + public List outputColumnsAndTypesToStringList(int[] outputColumns, TypeInfo[] typeInfos) { + final int size = outputColumns.length; + ArrayList result = new ArrayList(size); + for (int i = 0; i < size; i++) { + result.add(outputColumns[i] + ":" + typeInfos[i].toString()); + } + return result; + } + + public List outputColumnsAndTypesToStringList(VectorColumnMapping vectorColumnMapping) { + final int size = vectorColumnMapping.getCount(); + if (size == 0) { + return null; + } + int[] outputColumns = vectorColumnMapping.getOutputColumns(); + TypeInfo[] typeInfos = vectorColumnMapping.getTypeInfos(); + return outputColumnsAndTypesToStringList(outputColumns, typeInfos); + } + public List columnMappingToStringList(VectorColumnMapping vectorColumnMapping) { final int size = vectorColumnMapping.getCount(); if (size == 0) { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java index f2955af..3044b6f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java @@ -28,6 +28,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.optimizer.signature.Signature; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.plan.Explain.Level; import org.apache.hadoop.hive.ql.plan.Explain.Vectorization; import org.apache.hadoop.hive.ql.plan.VectorReduceSinkDesc.ReduceSinkKeyType; @@ -188,6 +189,16 @@ public Object clone() { return outputKeyColumnNames; } + @Explain(displayName = "output key column names", explainLevels = { Level.DEBUG }) + public List getOutputKeyColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputKeyColumnNames) { + result.add(Utilities.ReduceField.KEY.name() + "." + name); + } + return result; + } + + public void setOutputKeyColumnNames( java.util.ArrayList outputKeyColumnNames) { this.outputKeyColumnNames = outputKeyColumnNames; @@ -197,6 +208,15 @@ public void setOutputKeyColumnNames( return outputValueColumnNames; } + @Explain(displayName = "output value column names", explainLevels = { Level.DEBUG }) + public List getOutputValueColumnNamesDisplay() { + List result = new ArrayList(); + for (String name : outputValueColumnNames) { + result.add(Utilities.ReduceField.VALUE.name() + "." + name); + } + return result; + } + public void setOutputValueColumnNames( java.util.ArrayList outputValueColumnNames) { this.outputValueColumnNames = outputValueColumnNames; @@ -432,6 +452,7 @@ public void setSkipTag(boolean value) { this.skipTag = value; } + @Explain(displayName = "skipTag", explainLevels = { Level.DEBUG }) public boolean getSkipTag() { return skipTag; } @@ -536,34 +557,38 @@ public ReduceSinkOperatorExplainVectorization(ReduceSinkDesc reduceSinkDesc, return vectorExpressionsToStringList(vectorReduceSinkInfo.getReduceSinkValueExpressions()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getKeyColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "keyColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getKeyColumns() { if (!isNative) { return null; } int[] keyColumnMap = vectorReduceSinkInfo.getReduceSinkKeyColumnMap(); if (keyColumnMap == null) { // Always show an array. - keyColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(keyColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkKeyColumnMap(), + vectorReduceSinkInfo.getReduceSinkKeyTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getValueColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "valueColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getValueColumns() { if (!isNative) { return null; } int[] valueColumnMap = vectorReduceSinkInfo.getReduceSinkValueColumnMap(); if (valueColumnMap == null) { // Always show an array. - valueColumnMap = new int[0]; + return new ArrayList(); } - return Arrays.toString(valueColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkValueColumnMap(), + vectorReduceSinkInfo.getReduceSinkValueTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getBucketColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "bucketColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getBucketColumns() { if (!isNative) { return null; } @@ -572,11 +597,13 @@ public String getBucketColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(bucketColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkBucketColumnMap(), + vectorReduceSinkInfo.getReduceSinkBucketTypeInfos()); } - @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnNums", explainLevels = { Level.DEFAULT, Level.EXTENDED }) - public String getPartitionColumnNums() { + @Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns", explainLevels = { Level.DEFAULT, Level.EXTENDED }) + public List getPartitionColumns() { if (!isNative) { return null; } @@ -585,7 +612,9 @@ public String getPartitionColumnNums() { // Suppress empty column map. return null; } - return Arrays.toString(partitionColumnMap); + return outputColumnsAndTypesToStringList( + vectorReduceSinkInfo.getReduceSinkPartitionColumnMap(), + vectorReduceSinkInfo.getReduceSinkPartitionTypeInfos()); } private VectorizationCondition[] createNativeConditions() { diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java index 58032ca..a8f045c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinDesc.java @@ -85,11 +85,11 @@ public PrimitiveTypeInfo getPrimitiveTypeInfo() { } public static enum VectorMapJoinVariation { - NONE, - INNER_BIG_ONLY, INNER, + INNER_BIG_ONLY, LEFT_SEMI, - OUTER + OUTER, + FULL_OUTER } private HashTableImplementationType hashTableImplementationType; @@ -107,7 +107,7 @@ public VectorMapJoinDesc() { hashTableImplementationType = HashTableImplementationType.NONE; hashTableKind = HashTableKind.NONE; hashTableKeyType = HashTableKeyType.NONE; - vectorMapJoinVariation = VectorMapJoinVariation.NONE; + vectorMapJoinVariation = null; minMaxEnabled = false; allBigTableKeyExpressions = null; @@ -206,6 +206,7 @@ public VectorMapJoinInfo getVectorMapJoinInfo() { private List notSupportedKeyTypes; private boolean smallTableExprVectorizes; private boolean outerJoinHasNoKeys; + boolean isSaveNullKeyValuesForFullOuter; public void setUseOptimizedTable(boolean useOptimizedTable) { this.useOptimizedTable = useOptimizedTable; @@ -274,5 +275,10 @@ public void setIsHybridHashJoin(boolean isHybridHashJoin) { public boolean getIsHybridHashJoin() { return isHybridHashJoin; } - + public void setIsSaveNullKeyValuesForFullOuter(boolean isSaveNullKeyValuesForFullOuter) { + this.isSaveNullKeyValuesForFullOuter = isSaveNullKeyValuesForFullOuter; + } + public boolean getIsSaveNullKeyValuesForFullOuter() { + return isSaveNullKeyValuesForFullOuter; + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java index 6db0540..ad82e5c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/VectorMapJoinInfo.java @@ -48,9 +48,19 @@ private TypeInfo[] bigTableValueTypeInfos; private VectorExpression[] slimmedBigTableValueExpressions; - private VectorColumnOutputMapping bigTableRetainedMapping; - private VectorColumnOutputMapping bigTableOuterKeyMapping; - private VectorColumnSourceMapping smallTableMapping; + private VectorExpression[] bigTableFilterExpressions; + + private int[] bigTableRetainColumnMap; + private TypeInfo[] bigTableRetainTypeInfos; + + private int[] nonOuterSmallTableKeyColumnMap; + private TypeInfo[] nonOuterSmallTableKeyTypeInfos; + + private VectorColumnOutputMapping outerSmallTableKeyMapping; + + private VectorColumnSourceMapping fullOuterSmallTableKeyMapping; + + private VectorColumnSourceMapping smallTableValueMapping; private VectorColumnSourceMapping projectionMapping; @@ -65,9 +75,19 @@ public VectorMapJoinInfo() { bigTableValueTypeInfos = null; slimmedBigTableValueExpressions = null; - bigTableRetainedMapping = null; - bigTableOuterKeyMapping = null; - smallTableMapping = null; + bigTableFilterExpressions = null; + + bigTableRetainColumnMap = null; + bigTableRetainTypeInfos = null; + + nonOuterSmallTableKeyColumnMap = null; + nonOuterSmallTableKeyTypeInfos = null; + + outerSmallTableKeyMapping = null; + + fullOuterSmallTableKeyMapping = null; + + smallTableValueMapping = null; projectionMapping = null; } @@ -138,28 +158,69 @@ public void setSlimmedBigTableValueExpressions( this.slimmedBigTableValueExpressions = slimmedBigTableValueExpressions; } - public void setBigTableRetainedMapping(VectorColumnOutputMapping bigTableRetainedMapping) { - this.bigTableRetainedMapping = bigTableRetainedMapping; + public VectorExpression[] getBigTableFilterExpressions() { + return bigTableFilterExpressions; + } + + public void setBigTableFilterExpressions(VectorExpression[] bigTableFilterExpressions) { + this.bigTableFilterExpressions = bigTableFilterExpressions; + } + + public void setBigTableRetainColumnMap(int[] bigTableRetainColumnMap) { + this.bigTableRetainColumnMap = bigTableRetainColumnMap; + } + + public int[] getBigTableRetainColumnMap() { + return bigTableRetainColumnMap; + } + + public void setBigTableRetainTypeInfos(TypeInfo[] bigTableRetainTypeInfos) { + this.bigTableRetainTypeInfos = bigTableRetainTypeInfos; + } + + public TypeInfo[] getBigTableRetainTypeInfos() { + return bigTableRetainTypeInfos; + } + + public void setNonOuterSmallTableKeyColumnMap(int[] nonOuterSmallTableKeyColumnMap) { + this.nonOuterSmallTableKeyColumnMap = nonOuterSmallTableKeyColumnMap; + } + + public int[] getNonOuterSmallTableKeyColumnMap() { + return nonOuterSmallTableKeyColumnMap; + } + + public void setNonOuterSmallTableKeyTypeInfos(TypeInfo[] nonOuterSmallTableKeyTypeInfos) { + this.nonOuterSmallTableKeyTypeInfos = nonOuterSmallTableKeyTypeInfos; + } + + public TypeInfo[] getNonOuterSmallTableKeyTypeInfos() { + return nonOuterSmallTableKeyTypeInfos; + } + + public void setOuterSmallTableKeyMapping(VectorColumnOutputMapping outerSmallTableKeyMapping) { + this.outerSmallTableKeyMapping = outerSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableRetainedMapping() { - return bigTableRetainedMapping; + public VectorColumnOutputMapping getOuterSmallTableKeyMapping() { + return outerSmallTableKeyMapping; } - public void setBigTableOuterKeyMapping(VectorColumnOutputMapping bigTableOuterKeyMapping) { - this.bigTableOuterKeyMapping = bigTableOuterKeyMapping; + public void setFullOuterSmallTableKeyMapping( + VectorColumnSourceMapping fullOuterSmallTableKeyMapping) { + this.fullOuterSmallTableKeyMapping = fullOuterSmallTableKeyMapping; } - public VectorColumnOutputMapping getBigTableOuterKeyMapping() { - return bigTableOuterKeyMapping; + public VectorColumnSourceMapping getFullOuterSmallTableKeyMapping() { + return fullOuterSmallTableKeyMapping; } - public void setSmallTableMapping(VectorColumnSourceMapping smallTableMapping) { - this.smallTableMapping = smallTableMapping; + public void setSmallTableValueMapping(VectorColumnSourceMapping smallTableValueMapping) { + this.smallTableValueMapping = smallTableValueMapping; } - public VectorColumnSourceMapping getSmallTableMapping() { - return smallTableMapping; + public VectorColumnSourceMapping getSmallTableValueMapping() { + return smallTableValueMapping; } public void setProjectionMapping(VectorColumnSourceMapping projectionMapping) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java index 9f785e6..e5c749f 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/persistence/TestBytesBytesMultiHashMap.java @@ -84,9 +84,9 @@ public void testGetNonExistent() throws Exception { map.put(kv2, -1); key[0] = (byte)(key[0] + 1); BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(key, 0, key.length, hashMapResult); + map.getValueResult(key, 0, key.length, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); - map.getValueResult(key, 0, 0, hashMapResult); + map.getValueResult(key, 0, 0, hashMapResult, null); assertTrue(!hashMapResult.hasRows()); } @@ -104,7 +104,7 @@ public void testPutWithFullMap() throws Exception { assertEquals(CAPACITY, map.getCapacity()); // Get of non-existent key should terminate.. BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - map.getValueResult(new byte[0], 0, 0, hashMapResult); + map.getValueResult(new byte[0], 0, 0, hashMapResult, null); } @Test @@ -123,7 +123,7 @@ public void testExpand() throws Exception { private void verifyHashMapResult(BytesBytesMultiHashMap map, byte[] key, byte[]... values) { BytesBytesMultiHashMap.Result hashMapResult = new BytesBytesMultiHashMap.Result(); - byte state = map.getValueResult(key, 0, key.length, hashMapResult); + byte state = map.getValueResult(key, 0, key.length, hashMapResult, null); HashSet hs = new HashSet(); int count = 0; if (hashMapResult.hasRows()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java index 6491d79..244208b 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/CollectorTestOperator.java @@ -26,8 +26,22 @@ private static final long serialVersionUID = 1L; + private boolean isClosed; + private boolean isAborted; + public CollectorTestOperator() { super(); + + isClosed = false; + isAborted = false; + } + + public boolean getIsClosed() { + return isClosed; + } + + public boolean getIsAborted() { + return isAborted; } @Override @@ -36,6 +50,14 @@ public void process(Object row, int tag) throws HiveException { } @Override + public void closeOp(boolean abort) { + isClosed = true; + if (abort) { + isAborted = true; + } + } + + @Override public String getName() { return CollectorTestOperator.class.getSimpleName(); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java index 18933d4..ce90a6d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowCollectorTestOperator.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.exec.util.collectoroperator; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -28,19 +31,30 @@ private static final long serialVersionUID = 1L; private final ObjectInspector[] outputObjectInspectors; + private final int columnSize; public RowCollectorTestOperator(ObjectInspector[] outputObjectInspectors) { super(); this.outputObjectInspectors = outputObjectInspectors; + columnSize = outputObjectInspectors.length; } @Override public void process(Object row, int tag) throws HiveException { rowCount++; - Object[] rowObjectArray = (Object[]) row; - Object[] resultObjectArray = new Object[rowObjectArray.length]; - for (int c = 0; c < rowObjectArray.length; c++) { - resultObjectArray[c] = ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + Object[] resultObjectArray = new Object[columnSize]; + if (row instanceof ArrayList) { + List rowObjectList = (ArrayList) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectList.get(c)); + } + } else { + Object[] rowObjectArray = (Object[]) row; + for (int c = 0; c < columnSize; c++) { + resultObjectArray[c] = + ((PrimitiveObjectInspector) outputObjectInspectors[c]).copyObject(rowObjectArray[c]); + } } nextTestRow(new RowTestObjects(resultObjectArray)); } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java index 06cd1e9..a2f9f04 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/collectoroperator/RowVectorCollectorTestOperator.java @@ -39,6 +39,16 @@ public RowVectorCollectorTestOperator(TypeInfo[] outputTypeInfos, vectorExtractRow.init(outputTypeInfos); } + public RowVectorCollectorTestOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors) throws HiveException { + super(); + this.outputObjectInspectors = outputObjectInspectors; + vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(outputTypeInfos, outputProjectionColumnNums); + } + @Override public void process(Object row, int tag) throws HiveException { VectorizedRowBatch batch = (VectorizedRowBatch) row; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java index 51a5f8e..ec53a3d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/util/rowobjects/RowTestObjectsMultiSet.java @@ -26,55 +26,129 @@ import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; public class RowTestObjectsMultiSet { - private SortedMap sortedMap; - private int rowCount; - private int totalCount; + + public static enum RowFlag { + NONE (0), + REGULAR (0x01), + LEFT_OUTER (0x02), + FULL_OUTER (0x04); + + public final long value; + RowFlag(long value) { + this.value = value; + } + } + + private static class Value { + + // Mutable. + public int count; + public long rowFlags; + + public final int initialKeyCount; + public final int initialValueCount; + public final RowFlag initialRowFlag; + + public Value(int count, RowFlag rowFlag, int totalKeyCount, int totalValueCount) { + this.count = count; + this.rowFlags = rowFlag.value; + + initialKeyCount = totalKeyCount; + initialValueCount = totalValueCount; + initialRowFlag = rowFlag; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append("count "); + sb.append(count); + return sb.toString(); + } + } + + private SortedMap sortedMap; + private int totalKeyCount; + private int totalValueCount; public RowTestObjectsMultiSet() { - sortedMap = new TreeMap(); - rowCount = 0; - totalCount = 0; + sortedMap = new TreeMap(); + totalKeyCount = 0; + totalValueCount = 0; } - public int getRowCount() { - return rowCount; + public int getTotalKeyCount() { + return totalKeyCount; } - public int getTotalCount() { - return totalCount; + public int getTotalValueCount() { + return totalValueCount; } - public void add(RowTestObjects testRow) { + public void add(RowTestObjects testRow, RowFlag rowFlag) { if (sortedMap.containsKey(testRow)) { - Integer count = sortedMap.get(testRow); - count++; + Value value = sortedMap.get(testRow); + value.count++; + value.rowFlags |= rowFlag.value; + totalValueCount++; } else { - sortedMap.put(testRow, 1); - rowCount++; + sortedMap.put(testRow, new Value(1, rowFlag, ++totalKeyCount, ++totalValueCount)); + } + + } + + public void add(RowTestObjects testRow, int count) { + if (sortedMap.containsKey(testRow)) { + throw new RuntimeException(); + } + sortedMap.put(testRow, new Value(count, RowFlag.NONE, ++totalKeyCount, ++totalValueCount)); + } + + public String displayRowFlags(long rowFlags) { + StringBuilder sb = new StringBuilder(); + sb.append("{"); + for (RowFlag rowFlag : RowFlag.values()) { + if ((rowFlags & rowFlag.value) != 0) { + if (sb.length() > 1) { + sb.append(", "); + } + sb.append(rowFlag.name()); + } } - totalCount++; + sb.append("}"); + return sb.toString(); } - public boolean verify(RowTestObjectsMultiSet other) { + public boolean verify(RowTestObjectsMultiSet other, String left, String right) { final int thisSize = this.sortedMap.size(); final int otherSize = other.sortedMap.size(); if (thisSize != otherSize) { - System.out.println("*VERIFY* count " + thisSize + " doesn't match otherSize " + otherSize); + System.out.println("*BENCHMARK* " + left + " count " + thisSize + " doesn't match " + right + " " + otherSize); return false; } - Iterator> thisIterator = this.sortedMap.entrySet().iterator(); - Iterator> otherIterator = other.sortedMap.entrySet().iterator(); + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + Iterator> otherIterator = other.sortedMap.entrySet().iterator(); for (int i = 0; i < thisSize; i++) { - Entry thisEntry = thisIterator.next(); - Entry otherEntry = otherIterator.next(); + Entry thisEntry = thisIterator.next(); + Entry otherEntry = otherIterator.next(); if (!thisEntry.getKey().equals(otherEntry.getKey())) { - System.out.println("*VERIFY* thisEntry.getKey() " + thisEntry.getKey() + " doesn't match otherEntry.getKey() " + otherEntry.getKey()); + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + + " count " + thisEntry.getValue().count + ")" + + " but found " + right + " row " + otherEntry.getKey().toString() + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } // Check multi-set count. - if (!thisEntry.getValue().equals(otherEntry.getValue())) { - System.out.println("*VERIFY* key " + thisEntry.getKey() + " count " + thisEntry.getValue() + " doesn't match " + otherEntry.getValue()); + if (thisEntry.getValue().count != otherEntry.getValue().count) { + System.out.println("*BENCHMARK* " + left + " row " + thisEntry.getKey().toString() + + " count " + thisEntry.getValue().count + + " (rowFlags " + displayRowFlags(thisEntry.getValue().rowFlags) + ")" + + " doesn't match " + right + " row count " + otherEntry.getValue().count + + " (initialKeyCount " + + otherEntry.getValue().initialKeyCount + + " initialValueCount " + otherEntry.getValue().initialValueCount + ")"); return false; } } @@ -84,6 +158,51 @@ public boolean verify(RowTestObjectsMultiSet other) { return true; } + public RowTestObjectsMultiSet subtract(RowTestObjectsMultiSet other) { + RowTestObjectsMultiSet result = new RowTestObjectsMultiSet(); + + Iterator> thisIterator = this.sortedMap.entrySet().iterator(); + while (thisIterator.hasNext()) { + Entry thisEntry = thisIterator.next(); + + if (other.sortedMap.containsKey(thisEntry.getKey())) { + Value thisValue = thisEntry.getValue(); + Value otherValue = other.sortedMap.get(thisEntry.getKey()); + if (thisValue.count == otherValue.count) { + continue; + } + } + result.add(thisEntry.getKey(), thisEntry.getValue().count); + } + + return result; + } + + public void displayDifferences(RowTestObjectsMultiSet other, String left, String right) { + + RowTestObjectsMultiSet leftOnly = this.subtract(other); + Iterator> leftOnlyIterator = + leftOnly.sortedMap.entrySet().iterator(); + while (leftOnlyIterator.hasNext()) { + Entry leftOnlyEntry = leftOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + left + " only row " + leftOnlyEntry.getKey().toString() + + " count " + leftOnlyEntry.getValue().count + + " (initialRowFlag " + leftOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + + RowTestObjectsMultiSet rightOnly = other.subtract(this); + Iterator> rightOnlyIterator = + rightOnly.sortedMap.entrySet().iterator(); + while (rightOnlyIterator.hasNext()) { + Entry rightOnlyEntry = rightOnlyIterator.next(); + System.out.println( + "*BENCHMARK* " + right + " only row " + rightOnlyEntry.getKey().toString() + + " count " + rightOnlyEntry.getValue().count + + " (initialRowFlag " + rightOnlyEntry.getValue().initialRowFlag.name() + ")"); + } + } + @Override public String toString() { return sortedMap.toString(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java index f163289..85e5cb3 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampWritableAndColumnVector.java @@ -35,8 +35,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testDouble() throws Exception { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java index 2d0c783..6ce63a4 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/TestVectorSelectOperator.java @@ -62,9 +62,8 @@ public ValidatorVectorSelectOperator(CompilationOpContext ctx, OperatorDesc conf * Override forward to do validation */ @Override - public void forward(Object row, ObjectInspector rowInspector, boolean isVectorized) + public void vectorForward(VectorizedRowBatch vrg) throws HiveException { - VectorizedRowBatch vrg = (VectorizedRowBatch) row; int[] projections = vrg.projectedColumns; assertEquals(2, vrg.projectionSize); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java index 0514e3f..cb68dae 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestConfig.java @@ -26,10 +26,14 @@ import java.util.Map; import java.util.Map.Entry; +import org.apache.commons.lang.ArrayUtils; +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext; @@ -37,31 +41,42 @@ import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.CountCollectorTestOperator; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowCollectorTestOperatorBase; +import org.apache.hadoop.hive.ql.exec.util.collectoroperator.RowVectorCollectorTestOperator; import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjects; +import org.apache.hadoop.hive.ql.exec.util.rowobjects.RowTestObjectsMultiSet; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnOutputMapping; import org.apache.hadoop.hive.ql.exec.vector.VectorColumnSourceMapping; +import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinBaseOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationOperator; +import org.apache.hadoop.hive.ql.exec.vector.expressions.IdentityExpression; import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.OperatorDesc; import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.SelectDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.VectorDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc; import org.apache.hadoop.hive.ql.plan.VectorMapJoinInfo; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableImplementationType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKind; import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; +import org.apache.hadoop.hive.ql.plan.VectorSelectDesc; import org.apache.hadoop.hive.serde2.AbstractSerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.SerDeUtils; @@ -90,45 +105,209 @@ NATIVE_VECTOR_FAST } + public static boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { + return + (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && + mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + } + + /* + * This test collector operator is for MapJoin row-mode. + */ + public static class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public TestMultiSetCollectorOperator( + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) { + super(outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetCollectorOperator.class.getSimpleName(); + } + } + + public static class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + + private final RowTestObjectsMultiSet testRowMultiSet; + + public RowTestObjectsMultiSet getTestRowMultiSet() { + return testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) + throws HiveException { + super(outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public TestMultiSetVectorCollectorOperator( + int[] outputProjectionColumnNums, + TypeInfo[] outputTypeInfos, + ObjectInspector[] outputObjectInspectors, + RowTestObjectsMultiSet testRowMultiSet) throws HiveException { + super(outputProjectionColumnNums, outputTypeInfos, outputObjectInspectors); + this.testRowMultiSet = testRowMultiSet; + } + + public void nextTestRow(RowTestObjects testRow) { + testRowMultiSet.add(testRow, RowTestObjectsMultiSet.RowFlag.NONE); + } + + @Override + public String getName() { + return TestMultiSetVectorCollectorOperator.class.getSimpleName(); + } + } + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { + return createMapJoinDesc(testDesc, false); + } + + public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc, + boolean isFullOuterIntersect) { MapJoinDesc mapJoinDesc = new MapJoinDesc(); + mapJoinDesc.setPosBigTable(0); - List keyExpr = new ArrayList(); + + List bigTableKeyExpr = new ArrayList(); for (int i = 0; i < testDesc.bigTableKeyColumnNums.length; i++) { - keyExpr.add(new ExprNodeColumnDesc(testDesc.bigTableKeyTypeInfos[i], testDesc.bigTableKeyColumnNames[i], "B", false)); + bigTableKeyExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableKeyTypeInfos[i], + testDesc.bigTableKeyColumnNames[i], "B", false)); } Map> keyMap = new HashMap>(); - keyMap.put((byte)0, keyExpr); + keyMap.put((byte) 0, bigTableKeyExpr); + + mapJoinDesc.setFullOuterIntersect(isFullOuterIntersect); + + // Big Table expression includes all columns -- keys and extra (value) columns. + // UNDONE: Assumes all values retained... + List bigTableExpr = new ArrayList(); + for (int i = 0; i < testDesc.bigTableColumnNames.length; i++) { + bigTableExpr.add( + new ExprNodeColumnDesc( + testDesc.bigTableTypeInfos[i], + testDesc.bigTableColumnNames[i], "B", false)); + } + + Map> exprMap = new HashMap>(); + exprMap.put((byte) 0, bigTableExpr); + + List smallTableKeyExpr = new ArrayList(); + + for (int i = 0; i < testDesc.smallTableKeyTypeInfos.length; i++) { + ExprNodeColumnDesc exprNodeColumnDesc = + new ExprNodeColumnDesc( + testDesc.smallTableKeyTypeInfos[i], + testDesc.smallTableKeyColumnNames[i], "S", false); + smallTableKeyExpr.add(exprNodeColumnDesc); + } + // Retained Small Table keys and values. List smallTableExpr = new ArrayList(); - for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { - smallTableExpr.add(new ExprNodeColumnDesc(testDesc.smallTableValueTypeInfos[i], testDesc.smallTableValueColumnNames[i], "S", false)); + final int smallTableRetainKeySize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < smallTableRetainKeySize; i++) { + int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableKeyColumnNum], + testDesc.smallTableColumnNames[smallTableKeyColumnNum], "S", false)); + } + + final int smallTableRetainValueSize = testDesc.smallTableRetainValueColumnNums.length; + for (int i = 0; i < smallTableRetainValueSize; i++) { + int smallTableValueColumnNum = + smallTableRetainKeySize + testDesc.smallTableRetainValueColumnNums[i]; + smallTableExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableTypeInfos[smallTableValueColumnNum], + testDesc.smallTableColumnNames[smallTableValueColumnNum], "S", false)); } - keyMap.put((byte)1, smallTableExpr); + + keyMap.put((byte) 1, smallTableKeyExpr); + exprMap.put((byte) 1, smallTableExpr); mapJoinDesc.setKeys(keyMap); - mapJoinDesc.setExprs(keyMap); + mapJoinDesc.setExprs(exprMap); Byte[] order = new Byte[] {(byte) 0, (byte) 1}; mapJoinDesc.setTagOrder(order); - mapJoinDesc.setNoOuterJoin(testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER); + mapJoinDesc.setNoOuterJoin( + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.OUTER && + testDesc.vectorMapJoinVariation != VectorMapJoinVariation.FULL_OUTER); Map> filterMap = new HashMap>(); filterMap.put((byte) 0, new ArrayList()); // None. mapJoinDesc.setFilters(filterMap); List bigTableRetainColumnNumsList = intArrayToList(testDesc.bigTableRetainColumnNums); - - // For now, just small table values... - List smallTableRetainColumnNumsList = intArrayToList(testDesc.smallTableRetainValueColumnNums); - Map> retainListMap = new HashMap>(); retainListMap.put((byte) 0, bigTableRetainColumnNumsList); - retainListMap.put((byte) 1, smallTableRetainColumnNumsList); + + // For now, just small table keys/values... + if (testDesc.smallTableRetainKeyColumnNums.length == 0) { + + // Just the value columns numbers with retain. + List smallTableValueRetainColumnNumsList = + intArrayToList(testDesc.smallTableRetainValueColumnNums); + + retainListMap.put((byte) 1, smallTableValueRetainColumnNumsList); + } else { + + // Both the key/value columns numbers. + + // Zero and above numbers indicate a big table key is needed for + // small table result "area". + + // Negative numbers indicate a column to be (deserialize) read from the small table's + // LazyBinary value row. + + ArrayList smallTableValueIndicesNumsList = new ArrayList();; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(testDesc.smallTableRetainKeyColumnNums[i]); + } + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { + smallTableValueIndicesNumsList.add(-testDesc.smallTableRetainValueColumnNums[i] - 1); + } + int[] smallTableValueIndicesNums = + ArrayUtils.toPrimitive(smallTableValueIndicesNumsList.toArray(new Integer[0])); + + Map valueIndicesMap = new HashMap(); + valueIndicesMap.put((byte) 1, smallTableValueIndicesNums); + mapJoinDesc.setValueIndices(valueIndicesMap); + } mapJoinDesc.setRetainList(retainListMap); + switch (testDesc.mapJoinPlanVariation) { + case DYNAMIC_PARTITION_HASH_JOIN: + // FULL OUTER which behaves differently for dynamic partition hash join. + mapJoinDesc.setDynamicPartitionHashJoin(true); + break; + case SHARED_SMALL_TABLE: + mapJoinDesc.setDynamicPartitionHashJoin(false); + break; + default: + throw new RuntimeException( + "Unexpected map join plan variation " + testDesc.mapJoinPlanVariation); + } + int joinDescType; switch (testDesc.vectorMapJoinVariation) { case INNER: @@ -141,6 +320,9 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { case OUTER: joinDescType = JoinDesc.LEFT_OUTER_JOIN; break; + case FULL_OUTER: + joinDescType = JoinDesc.FULL_OUTER_JOIN; + break; default: throw new RuntimeException("unknown operator variation " + testDesc.vectorMapJoinVariation); } @@ -149,12 +331,25 @@ public static MapJoinDesc createMapJoinDesc(MapJoinTestDescription testDesc) { mapJoinDesc.setConds(conds); TableDesc keyTableDesc = PlanUtils.getMapJoinKeyTableDesc(testDesc.hiveConf, PlanUtils - .getFieldSchemasFromColumnList(keyExpr, "")); + .getFieldSchemasFromColumnList(smallTableKeyExpr, "")); mapJoinDesc.setKeyTblDesc(keyTableDesc); + // Small Table expression value columns. + List smallTableValueExpr = new ArrayList(); + + // All Small Table keys and values. + for (int i = 0; i < testDesc.smallTableValueColumnNames.length; i++) { + smallTableValueExpr.add( + new ExprNodeColumnDesc( + testDesc.smallTableValueTypeInfos[i], + testDesc.smallTableValueColumnNames[i], "S", false)); + } + TableDesc valueTableDesc = PlanUtils.getMapJoinValueTableDesc( - PlanUtils.getFieldSchemasFromColumnList(smallTableExpr, "")); + PlanUtils.getFieldSchemasFromColumnList(smallTableValueExpr, "")); ArrayList valueTableDescsList = new ArrayList(); + + // Big Table entry, then Small Table entry. valueTableDescsList.add(null); valueTableDescsList.add(valueTableDesc); mapJoinDesc.setValueTblDescs(valueTableDescsList); @@ -180,6 +375,7 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t hashTableKind = HashTableKind.HASH_SET; break; case OUTER: + case FULL_OUTER: hashTableKind = HashTableKind.HASH_MAP; break; default: @@ -190,9 +386,17 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t if (testDesc.bigTableKeyTypeInfos.length == 1) { switch (((PrimitiveTypeInfo) testDesc.bigTableKeyTypeInfos[0]).getPrimitiveCategory()) { case BOOLEAN: + hashTableKeyType = HashTableKeyType.BOOLEAN; + break; case BYTE: + hashTableKeyType = HashTableKeyType.BYTE; + break; case SHORT: + hashTableKeyType = HashTableKeyType.SHORT; + break; case INT: + hashTableKeyType = HashTableKeyType.INT; + break; case LONG: hashTableKeyType = HashTableKeyType.LONG; break; @@ -216,49 +420,112 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorDesc.setAllBigTableKeyExpressions(null); - vectorMapJoinInfo.setBigTableValueColumnMap(new int[0]); - vectorMapJoinInfo.setBigTableValueColumnNames(new String[0]); - vectorMapJoinInfo.setBigTableValueTypeInfos(new TypeInfo[0]); + vectorMapJoinInfo.setBigTableValueColumnMap(testDesc.bigTableColumnNums); + vectorMapJoinInfo.setBigTableValueColumnNames(testDesc.bigTableColumnNames); + vectorMapJoinInfo.setBigTableValueTypeInfos(testDesc.bigTableTypeInfos); vectorMapJoinInfo.setSlimmedBigTableValueExpressions(null); vectorDesc.setAllBigTableValueExpressions(null); + vectorMapJoinInfo.setBigTableFilterExpressions(new VectorExpression[0]); + + + /* + * Column mapping. + */ + VectorColumnOutputMapping bigTableRetainMapping = + new VectorColumnOutputMapping("Big Table Retain Mapping"); + + VectorColumnOutputMapping nonOuterSmallTableKeyMapping = + new VectorColumnOutputMapping("Non Outer Small Table Key Key Mapping"); + + VectorColumnOutputMapping outerSmallTableKeyMapping = + new VectorColumnOutputMapping("Outer Small Table Key Mapping"); + + VectorColumnSourceMapping fullOuterSmallTableKeyMapping = + new VectorColumnSourceMapping("Full Outer Small Table Key Mapping"); + VectorColumnSourceMapping projectionMapping = new VectorColumnSourceMapping("Projection Mapping"); + int nextOutputColumn = 0; - VectorColumnOutputMapping bigTableRetainedMapping = - new VectorColumnOutputMapping("Big Table Retained Mapping"); - for (int i = 0; i < testDesc.bigTableTypeInfos.length; i++) { - bigTableRetainedMapping.add(i, i, testDesc.bigTableTypeInfos[i]); - projectionMapping.add(i, i, testDesc.bigTableKeyTypeInfos[i]); + final int bigTableRetainedSize = testDesc.bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainedSize; i++) { + final int batchColumnIndex = testDesc.bigTableRetainColumnNums[i]; + TypeInfo typeInfo = testDesc.bigTableTypeInfos[i]; + projectionMapping.add( + nextOutputColumn, batchColumnIndex, typeInfo); + // Collect columns we copy from the big table batch to the overflow batch. + if (!bigTableRetainMapping.containsOutputColumn(batchColumnIndex)) { + + // Tolerate repeated use of a big table column. + bigTableRetainMapping.add(batchColumnIndex, batchColumnIndex, typeInfo); + } + nextOutputColumn++; } - VectorColumnOutputMapping bigTableOuterKeyMapping = - new VectorColumnOutputMapping("Big Table Outer Key Mapping"); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + int emulateScratchColumn = testDesc.bigTableTypeInfos.length; + + VectorColumnOutputMapping smallTableKeyOutputMapping = + new VectorColumnOutputMapping("Small Table Key Output Mapping"); + final int smallTableKeyRetainSize = testDesc.smallTableRetainKeyColumnNums.length; + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + final int bigTableKeyColumnNum = testDesc.bigTableKeyColumnNums[smallTableKeyColumnNum]; + TypeInfo keyTypeInfo = testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]; + if (!isOuterJoin) { + // Project the big table key into the small table result "area". + projectionMapping.add(nextOutputColumn, bigTableKeyColumnNum, keyTypeInfo); + if (!bigTableRetainMapping.containsOutputColumn(bigTableKeyColumnNum)) { + nonOuterSmallTableKeyMapping.add(bigTableKeyColumnNum, bigTableKeyColumnNum, keyTypeInfo); + } + } else { + outerSmallTableKeyMapping.add(bigTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, keyTypeInfo); + + // For FULL OUTER MapJoin, we need to be able to deserialize a Small Table key + // into the output result. + fullOuterSmallTableKeyMapping.add(smallTableKeyColumnNum, emulateScratchColumn, keyTypeInfo); + emulateScratchColumn++; + } + nextOutputColumn++; + } // The order of the fields in the LazyBinary small table value must be used, so // we use the source ordering flavor for the mapping. - VectorColumnSourceMapping smallTableMapping = - new VectorColumnSourceMapping("Small Table Mapping"); - int outputColumn = testDesc.bigTableTypeInfos.length; + VectorColumnSourceMapping smallTableValueMapping = + new VectorColumnSourceMapping("Small Table Value Mapping"); for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { - smallTableMapping.add(i, outputColumn, testDesc.smallTableValueTypeInfos[i]); - projectionMapping.add(outputColumn, outputColumn, testDesc.smallTableValueTypeInfos[i]); - outputColumn++; + smallTableValueMapping.add(i, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + projectionMapping.add(nextOutputColumn, emulateScratchColumn, testDesc.smallTableValueTypeInfos[i]); + emulateScratchColumn++; + nextOutputColumn++; } // Convert dynamic arrays and maps to simple arrays. - bigTableRetainedMapping.finalize(); + bigTableRetainMapping.finalize(); + vectorMapJoinInfo.setBigTableRetainColumnMap(bigTableRetainMapping.getOutputColumns()); + vectorMapJoinInfo.setBigTableRetainTypeInfos(bigTableRetainMapping.getTypeInfos()); + + nonOuterSmallTableKeyMapping.finalize(); + vectorMapJoinInfo.setNonOuterSmallTableKeyColumnMap(nonOuterSmallTableKeyMapping.getOutputColumns()); + vectorMapJoinInfo.setNonOuterSmallTableKeyTypeInfos(nonOuterSmallTableKeyMapping.getTypeInfos()); - bigTableOuterKeyMapping.finalize(); + outerSmallTableKeyMapping.finalize(); + fullOuterSmallTableKeyMapping.finalize(); - smallTableMapping.finalize(); + vectorMapJoinInfo.setOuterSmallTableKeyMapping(outerSmallTableKeyMapping); + vectorMapJoinInfo.setFullOuterSmallTableKeyMapping(fullOuterSmallTableKeyMapping); - vectorMapJoinInfo.setBigTableRetainedMapping(bigTableRetainedMapping); - vectorMapJoinInfo.setBigTableOuterKeyMapping(bigTableOuterKeyMapping); - vectorMapJoinInfo.setSmallTableMapping(smallTableMapping); + smallTableValueMapping.finalize(); + + vectorMapJoinInfo.setSmallTableValueMapping(smallTableValueMapping); projectionMapping.finalize(); @@ -267,7 +534,9 @@ public static VectorMapJoinDesc createVectorMapJoinDesc(MapJoinTestDescription t vectorMapJoinInfo.setProjectionMapping(projectionMapping); - assert projectionMapping.getCount() == testDesc.outputColumnNames.length; + if (projectionMapping.getCount() != testDesc.outputColumnNames.length) { + throw new RuntimeException(); + }; vectorDesc.setVectorMapJoinInfo(vectorMapJoinInfo); @@ -306,6 +575,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterLongOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterLongOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -331,6 +611,16 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( operator = new VectorMapJoinOuterStringOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterStringOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); @@ -358,6 +648,17 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( new VectorMapJoinOuterMultiKeyOperator(new CompilationOpContext(), mapJoinDesc, vContext, vectorDesc); break; + case FULL_OUTER: + if (mapJoinDesc.isFullOuterIntersect()) { + operator = + new VectorMapJoinFullOuterIntersectMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } else { + operator = + new VectorMapJoinFullOuterMultiKeyOperator(new CompilationOpContext(), + mapJoinDesc, vContext, vectorDesc); + } + break; default: throw new RuntimeException("unknown operator variation " + VectorMapJoinVariation); } @@ -365,16 +666,31 @@ public static VectorMapJoinCommonOperator createNativeVectorMapJoinOperator( default: throw new RuntimeException("Unknown hash table key type " + vectorDesc.getHashTableKeyType()); } + System.out.println("*BENCHMARK* createNativeVectorMapJoinOperator " + + operator.getClass().getSimpleName()); return operator; } public static VectorizationContext createVectorizationContext(MapJoinTestDescription testDesc) throws HiveException { VectorizationContext vContext = - new VectorizationContext("test", testDesc.bigTableColumnNamesList); + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + if (isOuterJoin) { + + // We need physical columns. + for (int i = 0; i < testDesc.smallTableRetainKeyColumnNums.length; i++) { + final int smallTableKeyRetainColumnNum = testDesc.smallTableRetainKeyColumnNums[i]; + vContext.allocateScratchColumn(testDesc.smallTableKeyTypeInfos[smallTableKeyRetainColumnNum]); + } + } // Create scratch columns to hold small table results. - for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { + for (int i = 0; i < testDesc.smallTableRetainValueColumnNums.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } return vContext; @@ -390,19 +706,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi final Byte smallTablePos = 1; - // UNDONE: Why do we need to specify BinarySortableSerDe explicitly here??? TableDesc keyTableDesc = mapJoinDesc.getKeyTblDesc(); AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance( BinarySortableSerDe.class, null); SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null); MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false); - TableDesc valueTableDesc; + final List valueTableDescList; if (mapJoinDesc.getNoOuterJoin()) { - valueTableDesc = mapJoinDesc.getValueTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueTblDescs(); } else { - valueTableDesc = mapJoinDesc.getValueFilteredTblDescs().get(smallTablePos); + valueTableDescList = mapJoinDesc.getValueFilteredTblDescs(); } + TableDesc valueTableDesc = valueTableDescList.get(smallTablePos); AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance( valueTableDesc.getDeserializerClass(), null); SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null); @@ -414,16 +730,19 @@ public static MapJoinTableContainerSerDe createMapJoinTableContainerSerDe(MapJoi } public static void connectOperators( - MapJoinTestDescription testDesc, Operator operator, - Operator testCollectorOperator) throws HiveException { - Operator[] parents = new Operator[] {operator}; - testCollectorOperator.setParentOperators(Arrays.asList(parents)); - Operator[] childOperators = new Operator[] {testCollectorOperator}; - operator.setChildOperators(Arrays.asList(childOperators)); - HiveConf.setBoolVar(testDesc.hiveConf, - HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + Operator childOperator) throws HiveException { + + List> newParentOperators = newOperatorList(); + newParentOperators.addAll(childOperator.getParentOperators()); + newParentOperators.add(operator); + childOperator.setParentOperators(newParentOperators); + + List> newChildOperators = newOperatorList(); + newChildOperators.addAll(operator.getChildOperators()); + newChildOperators.add(childOperator); + operator.setChildOperators(newChildOperators); + } private static List intArrayToList(int[] intArray) { @@ -509,9 +828,25 @@ private static void loadTableContainerData(MapJoinTestDescription testDesc, MapJ mapJoinTableContainer.seal(); } - public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin) + public static class CreateMapJoinResult { + public final MapJoinOperator mapJoinOperator; + public final MapJoinTableContainer mapJoinTableContainer; + public final MapJoinTableContainerSerDe mapJoinTableContainerSerDe; + + public CreateMapJoinResult( + MapJoinOperator mapJoinOperator, + MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) { + this.mapJoinOperator = mapJoinOperator; + this.mapJoinTableContainer = mapJoinTableContainer; + this.mapJoinTableContainerSerDe = mapJoinTableContainerSerDe; + } + } + public static CreateMapJoinResult createMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, boolean isVectorMapJoin, boolean isOriginalMapJoin, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { final Byte bigTablePos = 0; @@ -539,11 +874,16 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, operator = new MapJoinOperator(new CompilationOpContext()); operator.setConf(mapJoinDesc); } else { - VectorizationContext vContext = new VectorizationContext("test", testDesc.bigTableColumnNamesList); + VectorizationContext vContext = + new VectorizationContext("test", testDesc.bigTableColumnNameList); + + /* + // UNDONE: Unclear this belonds in the input VectorizationContext... // Create scratch columns to hold small table results. for (int i = 0; i < testDesc.smallTableValueTypeInfos.length; i++) { vContext.allocateScratchColumn(testDesc.smallTableValueTypeInfos[i]); } + */ // This is what the Vectorizer class does. VectorMapJoinDesc vectorMapJoinDesc = new VectorMapJoinDesc(); @@ -571,21 +911,20 @@ public static MapJoinOperator createMapJoin(MapJoinTestDescription testDesc, } } - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); - - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, mapJoinTableContainerSerDe); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - return operator; + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); } - public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription testDesc, - Operator collectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType) + public static CreateMapJoinResult createNativeVectorMapJoin( + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, HashTableImplementationType hashTableImplementationType, + MapJoinTableContainer shareMapJoinTableContainer) throws SerDeException, IOException, HiveException { VectorMapJoinDesc vectorDesc = MapJoinTestConfig.createVectorMapJoinDesc(testDesc); - - // UNDONE mapJoinDesc.setVectorDesc(vectorDesc); vectorDesc.setHashTableImplementationType(hashTableImplementationType); @@ -593,13 +932,14 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t VectorMapJoinInfo vectorMapJoinInfo = vectorDesc.getVectorMapJoinInfo(); MapJoinTableContainer mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = null; switch (vectorDesc.getHashTableImplementationType()) { case OPTIMIZED: mapJoinTableContainer = new MapJoinBytesTableContainer( testDesc.hiveConf, null, testData.smallTableKeyHashMap.size(), 0); - MapJoinTableContainerSerDe mapJoinTableContainerSerDe = + mapJoinTableContainerSerDe = MapJoinTestConfig.createMapJoinTableContainerSerDe(mapJoinDesc); mapJoinTableContainer.setSerde( @@ -615,7 +955,11 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t throw new RuntimeException("Unexpected hash table implementation type " + vectorDesc.getHashTableImplementationType()); } - loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// if (shareMapJoinTableContainer == null) { + loadTableContainerData(testDesc, testData, mapJoinTableContainer); +// } else { +// setTableContainerData(mapJoinTableContainer, shareMapJoinTableContainer); +// } VectorizationContext vContext = MapJoinTestConfig.createVectorizationContext(testDesc); @@ -636,56 +980,295 @@ public static MapJoinOperator createNativeVectorMapJoin(MapJoinTestDescription t vectorDesc, vContext); - MapJoinTestConfig.connectOperators(testDesc, operator, collectorOperator); + HiveConf.setBoolVar(testDesc.hiveConf, + HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD, true); - operator.setTestMapJoinTableContainer(1, mapJoinTableContainer, null); + return new CreateMapJoinResult(operator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } - return operator; + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + MapJoinTestData testData, + MapJoinDesc mapJoinDesc) + throws SerDeException, IOException, HiveException { + return createMapJoinImplementation( + mapJoinImplementation, testDesc, testData, mapJoinDesc, null); } - public static MapJoinOperator createMapJoinImplementation(MapJoinTestImplementation mapJoinImplementation, + public static CreateMapJoinResult createMapJoinImplementation( + MapJoinTestImplementation mapJoinImplementation, MapJoinTestDescription testDesc, - Operator testCollectorOperator, MapJoinTestData testData, - MapJoinDesc mapJoinDesc) throws SerDeException, IOException, HiveException { + MapJoinTestData testData, + MapJoinDesc mapJoinDesc, + MapJoinTableContainer shareMapJoinTableContainer) + throws SerDeException, IOException, HiveException { - MapJoinOperator operator; + CreateMapJoinResult result; switch (mapJoinImplementation) { case ROW_MODE_HASH_MAP: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ true); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ true, + shareMapJoinTableContainer); break; case ROW_MODE_OPTIMIZED: // MapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ false, - /* isOriginalMapJoin */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ false, + /* isOriginalMapJoin */ false, + shareMapJoinTableContainer); break; case VECTOR_PASS_THROUGH: // VectorMapJoinOperator - operator = MapJoinTestConfig.createMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, /* isVectorMapJoin */ true, - /* n/a */ false); + result = MapJoinTestConfig.createMapJoin( + testDesc, testData, mapJoinDesc, /* isVectorMapJoin */ true, + /* n/a */ false, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_OPTIMIZED: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.OPTIMIZED); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.OPTIMIZED, + shareMapJoinTableContainer); break; case NATIVE_VECTOR_FAST: - operator = MapJoinTestConfig.createNativeVectorMapJoin( - testDesc, testCollectorOperator, testData, mapJoinDesc, HashTableImplementationType.FAST); + result = MapJoinTestConfig.createNativeVectorMapJoin( + testDesc, testData, mapJoinDesc, + HashTableImplementationType.FAST, + shareMapJoinTableContainer); break; default: throw new RuntimeException("Unexpected MapJoin Operator Implementation " + mapJoinImplementation); } - return operator; + return result; + } + + private static Operator makeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + String[] outputColumnNames, TypeInfo[] outputTypeInfos) { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + List selectExprList = new ArrayList(); + List selectOutputColumnNameList = new ArrayList(); + for (int i = 0; i < bigTableRetainSize; i++) { + String selectOutputColumnName = "_col" + i; + selectOutputColumnNameList.add(selectOutputColumnName); + + TypeInfo outputTypeInfo = outputTypeInfos[i]; + if (i < bigTableKeySize) { + + // Big Table key. + ExprNodeColumnDesc keyColumnExpr = + new ExprNodeColumnDesc( + outputTypeInfo, + outputColumnNames[i], "test", false); + selectExprList.add(keyColumnExpr); + } else { + + // For row-mode, substitute NULL constant for any non-key extra Big Table columns. + ExprNodeConstantDesc nullExtraColumnExpr = + new ExprNodeConstantDesc( + outputTypeInfo, + null); + nullExtraColumnExpr.setFoldedFromCol(outputColumnNames[i]); + selectExprList.add(nullExtraColumnExpr); + } + } + + SelectDesc selectDesc = new SelectDesc(selectExprList, selectOutputColumnNameList); + Operator selectOperator = + OperatorFactory.get(new CompilationOpContext(), selectDesc); + + return selectOperator; + } + + private static Operator vectorizeInterceptSelectOperator( + MapJoinOperator mapJoinOperator, int bigTableKeySize, int bigTableRetainSize, + Operator selectOperator) throws HiveException{ + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + + SelectDesc selectDesc = (SelectDesc) selectOperator.getConf(); + List selectExprs = selectDesc.getColList(); + + VectorExpression[] selectVectorExpr = new VectorExpression[bigTableRetainSize]; + for (int i = 0; i < bigTableRetainSize; i++) { + + TypeInfo typeInfo = selectExprs.get(i).getTypeInfo(); + if (i < bigTableKeySize) { + + // Big Table key. + selectVectorExpr[i] = vOutContext.getVectorExpression(selectExprs.get(i)); + } else { + + // For vector-mode, for test purposes we substitute a NO-OP (we don't want to modify + // the batch). + + // FULL OUTER INTERCEPT does not look at non-key columns. + + NoOpExpression noOpExpression = new NoOpExpression(i); + + noOpExpression.setInputTypeInfos(typeInfo); + noOpExpression.setInputDataTypePhysicalVariations(DataTypePhysicalVariation.NONE); + + noOpExpression.setOutputTypeInfo(typeInfo); + noOpExpression.setOutputDataTypePhysicalVariation(DataTypePhysicalVariation.NONE); + + selectVectorExpr[i] = noOpExpression; + } + } + + System.out.println("*BENCHMARK* VectorSelectOperator selectVectorExpr " + + Arrays.toString(selectVectorExpr)); + + int[] projectedColumns = + ArrayUtils.toPrimitive( + vOutContext.getProjectedColumns().subList(0, bigTableRetainSize). + toArray(new Integer[0])); + System.out.println("*BENCHMARK* VectorSelectOperator projectedColumns " + + Arrays.toString(projectedColumns)); + + VectorSelectDesc vectorSelectDesc = new VectorSelectDesc(); + vectorSelectDesc.setSelectExpressions(selectVectorExpr); + vectorSelectDesc.setProjectedOutputColumns(projectedColumns); + + Operator vectorSelectOperator = OperatorFactory.getVectorOperator( + selectOperator.getCompilationOpContext(), selectDesc, + vOutContext, vectorSelectDesc); + + return vectorSelectOperator; + } + + public static CountCollectorTestOperator addFullOuterIntercept( + MapJoinTestImplementation mapJoinImplementation, + MapJoinTestDescription testDesc, + RowTestObjectsMultiSet outputTestRowMultiSet, MapJoinTestData testData, + MapJoinOperator mapJoinOperator, MapJoinTableContainer mapJoinTableContainer, + MapJoinTableContainerSerDe mapJoinTableContainerSerDe) + throws SerDeException, IOException, HiveException { + + MapJoinDesc mapJoinDesc = (MapJoinDesc) mapJoinOperator.getConf(); + + // For FULL OUTER MapJoin, we require all Big Keys to be present in the output result. + // The first N output columns are the Big Table key columns. + Map> keyMap = mapJoinDesc.getKeys(); + List bigTableKeyExprs = keyMap.get((byte) 0); + final int bigTableKeySize = bigTableKeyExprs.size(); + + Map> retainMap = mapJoinDesc.getRetainList(); + List bigTableRetainList = retainMap.get((byte) 0); + final int bigTableRetainSize = bigTableRetainList.size(); + + List outputColumnNameList = mapJoinDesc.getOutputColumnNames(); + String[] mapJoinOutputColumnNames = outputColumnNameList.toArray(new String[0]); + + // Use a utility method to get the MapJoin output TypeInfo. + TypeInfo[] mapJoinOutputTypeInfos = VectorMapJoinBaseOperator.getOutputTypeInfos(mapJoinDesc); + + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); + + /* + * Always create a row-mode SelectOperator. If we are vector-mode, next we will use its + * expressions and replace it with a VectorSelectOperator. + */ + Operator selectOperator = + makeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, + mapJoinOutputColumnNames, mapJoinOutputTypeInfos); + + List selectOutputColumnNameList = + ((SelectDesc) selectOperator.getConf()).getOutputColumnNames(); + String[] selectOutputColumnNames = + selectOutputColumnNameList.toArray(new String[0]); + + if (isVectorOutput) { + selectOperator = + vectorizeInterceptSelectOperator( + mapJoinOperator, bigTableKeySize, bigTableRetainSize, selectOperator); + } + + /* + * Create test description just for FULL OUTER INTERCEPT with different + */ + MapJoinTestDescription interceptTestDesc = + new MapJoinTestDescription( + testDesc.hiveConf, testDesc.vectorMapJoinVariation, + selectOutputColumnNames, + Arrays.copyOf(mapJoinOutputTypeInfos, bigTableRetainSize), + testDesc.bigTableKeyColumnNums, + testDesc.smallTableValueTypeInfos, + testDesc.smallTableRetainKeyColumnNums, + testDesc.smallTableGenerationParameters, + testDesc.mapJoinPlanVariation); + + MapJoinDesc intersectMapJoinDesc = + createMapJoinDesc(interceptTestDesc, /* isFullOuterIntersect */ true); + + /* + * Create FULL OUTER INTERSECT MapJoin operator. + */ + CreateMapJoinResult interceptCreateMapJoinResult = + createMapJoinImplementation( + mapJoinImplementation, interceptTestDesc, testData, intersectMapJoinDesc); + MapJoinOperator intersectMapJoinOperator = + interceptCreateMapJoinResult.mapJoinOperator; + MapJoinTableContainer intersectMapJoinTableContainer = + interceptCreateMapJoinResult.mapJoinTableContainer; + MapJoinTableContainerSerDe interceptMapJoinTableContainerSerDe = + interceptCreateMapJoinResult.mapJoinTableContainerSerDe; + + connectOperators(mapJoinOperator, selectOperator); + + connectOperators(selectOperator, intersectMapJoinOperator); + + CountCollectorTestOperator interceptTestCollectorOperator; + if (!isVectorOutput) { + interceptTestCollectorOperator = + new TestMultiSetCollectorOperator( + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vContext = + ((VectorizationContextRegion) intersectMapJoinOperator).getOutputVectorizationContext(); + int[] intersectProjectionColumns = + ArrayUtils.toPrimitive(vContext.getProjectedColumns().toArray(new Integer[0])); + interceptTestCollectorOperator = + new TestMultiSetVectorCollectorOperator( + intersectProjectionColumns, + interceptTestDesc.outputTypeInfos, + interceptTestDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + connectOperators(intersectMapJoinOperator, interceptTestCollectorOperator); + + // Setup the FULL OUTER INTERSECT MapJoin's inputObjInspector to include the Small Table, etc. + intersectMapJoinOperator.setInputObjInspectors(interceptTestDesc.inputObjectInspectors); + + // Now, invoke initializeOp methods from the root MapJoin operator. + mapJoinOperator.initialize(testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables container references to our test data. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + intersectMapJoinOperator.setTestMapJoinTableContainer( + 1, intersectMapJoinTableContainer, interceptMapJoinTableContainerSerDe); + + return interceptTestCollectorOperator; + } + + private static List> newOperatorList() { + return new ArrayList>(); } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java index d763695..4994e9e 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestData.java @@ -20,6 +20,7 @@ import java.util.ArrayList; import java.util.HashMap; +import java.util.List; import java.util.Random; import java.util.Map.Entry; @@ -37,6 +38,7 @@ import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.VectorMapJoinVariation; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; @@ -58,6 +60,8 @@ HashMap smallTableKeyHashMap; + List fullOuterAdditionalSmallTableKeys; + ArrayList smallTableValueCounts; ArrayList> smallTableValues; @@ -68,83 +72,128 @@ public MapJoinTestData(int rowCount, MapJoinTestDescription testDesc, this.smallTableRandomSeed = smallTableRandomSeed; - generateTypes = generateTypesFromTypeInfos(testDesc.bigTableTypeInfos); + boolean isOuterJoin = + (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER); + + generateTypes = generateTypesFromTypeInfos( + testDesc.bigTableTypeInfos, + testDesc.bigTableKeyColumnNums.length, + isOuterJoin); generator = new VectorBatchGenerator(generateTypes); bigTableBatch = generator.createBatch(); // Add small table result columns. - ColumnVector[] newCols = new ColumnVector[bigTableBatch.cols.length + testDesc.smallTableValueTypeInfos.length]; + + // Only [FULL] OUTER MapJoin needs a physical column. + final int smallTableRetainKeySize = + (isOuterJoin ? testDesc.smallTableRetainKeyColumnNums.length : 0); + ColumnVector[] newCols = + new ColumnVector[ + bigTableBatch.cols.length + + smallTableRetainKeySize + + testDesc.smallTableValueTypeInfos.length]; System.arraycopy(bigTableBatch.cols, 0, newCols, 0, bigTableBatch.cols.length); + int colIndex = bigTableBatch.cols.length; + + if (isOuterJoin) { + for (int s = 0; s < smallTableRetainKeySize; s++) { + final int smallTableKeyColumnNum = testDesc.smallTableRetainKeyColumnNums[s]; + newCols[colIndex++] = + VectorizedBatchUtil.createColumnVector( + testDesc.smallTableKeyTypeInfos[smallTableKeyColumnNum]); + } + } for (int s = 0; s < testDesc.smallTableValueTypeInfos.length; s++) { - newCols[bigTableBatch.cols.length + s] = + newCols[colIndex++] = VectorizedBatchUtil.createColumnVector(testDesc.smallTableValueTypeInfos[s]); } bigTableBatch.cols = newCols; bigTableBatch.numCols = newCols.length; - + // This stream will be restarted with the same random seed over and over. bigTableBatchStream = new VectorBatchGenerateStream( bigTableRandomSeed, generator, rowCount); - VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + VectorExtractRow keyVectorExtractRow = new VectorExtractRow(); + keyVectorExtractRow.init(testDesc.bigTableKeyTypeInfos, testDesc.bigTableKeyColumnNums); smallTableGenerationParameters = testDesc.getSmallTableGenerationParameters(); + HashMap bigTableKeyHashMap = new HashMap(); smallTableKeyHashMap = new HashMap(); + Random smallTableRandom = new Random(smallTableRandomSeed); // Start small table random generation // from beginning. ValueOption valueOption = smallTableGenerationParameters.getValueOption(); - int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); + if (valueOption != ValueOption.NO_REGULAR_SMALL_KEYS) { + int keyOutOfAThousand = smallTableGenerationParameters.getKeyOutOfAThousand(); - bigTableBatchStream.reset(); - while (bigTableBatchStream.isNext()) { - bigTableBatch.reset(); - bigTableBatchStream.fillNext(bigTableBatch); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + bigTableBatch.reset(); + bigTableBatchStream.fillNext(bigTableBatch); - final int size = bigTableBatch.size; - for (int i = 0; i < size; i++) { - - if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { - RowTestObjects testKey = getTestKey(bigTableBatch, i, vectorExtractRow, + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); + bigTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); + + if (smallTableRandom.nextInt(1000) <= keyOutOfAThousand) { - if (valueOption == ValueOption.ONLY_ONE) { - if (smallTableKeyHashMap.containsKey(testKey)) { - continue; + if (valueOption == ValueOption.ONLY_ONE) { + if (smallTableKeyHashMap.containsKey(testKey)) { + continue; + } } + smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } - smallTableKeyHashMap.put((RowTestObjects) testKey.clone(), -1); } } } //--------------------------------------------------------------------------------------------- - // UNDONE: For now, don't add more small keys... - /* - // Add more small table keys that are not in Big Table batches. - final int smallTableAdditionalLength = 1 + random.nextInt(4); - final int smallTableAdditionalSize = smallTableAdditionalLength * maxBatchSize; - VectorizedRowBatch[] smallTableAdditionalBatches = createBigTableBatches(generator, smallTableAdditionalLength); - for (int i = 0; i < smallTableAdditionalLength; i++) { - generator.generateBatch(smallTableAdditionalBatches[i], random, maxBatchSize); + // Add more small table keys that are not in Big Table or Small Table for FULL OUTER. + + fullOuterAdditionalSmallTableKeys = new ArrayList(); + + VectorBatchGenerateStream altBigTableBatchStream = + new VectorBatchGenerateStream( + smallTableRandomSeed, generator, 100); + altBigTableBatchStream.reset(); + while (altBigTableBatchStream.isNext()) { + bigTableBatch.reset(); + altBigTableBatchStream.fillNext(bigTableBatch); + final int size = bigTableBatch.size; + for (int i = 0; i < size; i++) { + RowTestObjects testKey = getTestKey(bigTableBatch, i, keyVectorExtractRow, + testDesc.bigTableKeyTypeInfos.length, + testDesc.bigTableObjectInspectors); + if (bigTableKeyHashMap.containsKey(testKey) || + smallTableKeyHashMap.containsKey(testKey)) { + continue; + } + RowTestObjects testKeyClone = (RowTestObjects) testKey.clone(); + smallTableKeyHashMap.put(testKeyClone, -1); + fullOuterAdditionalSmallTableKeys.add(testKeyClone); + } } - TestRow[] additionalTestKeys = getTestKeys(smallTableAdditionalBatches, vectorExtractRow, - testDesc.bigTableKeyTypeInfos.length, testDesc.bigTableObjectInspectors); - final int smallTableAdditionKeyProbes = smallTableAdditionalSize / 2; - for (int i = 0; i < smallTableAdditionKeyProbes; i++) { - int index = random.nextInt(smallTableAdditionalSize); - TestRow additionalTestKey = additionalTestKeys[index]; - smallTableKeyHashMap.put((TestRow) additionalTestKey.clone(), -1); + + // Make sure there is a NULL key. + Object[] nullKeyRowObjects = new Object[testDesc.bigTableKeyTypeInfos.length]; + RowTestObjects nullTestKey = new RowTestObjects(nullKeyRowObjects); + if (!smallTableKeyHashMap.containsKey(nullTestKey)) { + smallTableKeyHashMap.put(nullTestKey, -1); + fullOuterAdditionalSmallTableKeys.add(nullTestKey); } - */ // Number the test rows with collection order. int addCount = 0; @@ -177,9 +226,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes MapJoinOperator operator) throws HiveException { VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); - final int columnCount = testDesc.bigTableKeyTypeInfos.length; + final int columnCount = testDesc.bigTableTypeInfos.length; Object[] row = new Object[columnCount]; testData.bigTableBatchStream.reset(); @@ -194,7 +243,9 @@ public static void driveBigTableData(MapJoinTestDescription testDesc, MapJoinTes operator.process(row, 0); } } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJoinTestData testData, @@ -207,7 +258,9 @@ public static void driveVectorBigTableData(MapJoinTestDescription testDesc, MapJ operator.process(testData.bigTableBatch, 0); } - operator.closeOp(false); + + // Close the operator tree. + operator.close(false); } public static void generateVariationData(MapJoinTestData testData, @@ -219,6 +272,7 @@ public static void generateVariationData(MapJoinTestData testData, break; case INNER: case OUTER: + case FULL_OUTER: testData.generateRandomSmallTableCounts(testDesc, random); testData.generateRandomSmallTableValues(testDesc, random); break; @@ -230,10 +284,15 @@ public static void generateVariationData(MapJoinTestData testData, private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescription testDesc, Random random) { final int columnCount = testDesc.smallTableValueTypeInfos.length; - Object[] smallTableValueRow = VectorRandomRowSource.randomWritablePrimitiveRow(columnCount, random, - testDesc.smallTableValuePrimitiveTypeInfos); + PrimitiveTypeInfo[] primitiveTypeInfos = new PrimitiveTypeInfo[columnCount]; + for (int i = 0; i < columnCount; i++) { + primitiveTypeInfos[i] = (PrimitiveTypeInfo) testDesc.smallTableValueTypeInfos[i]; + } + Object[] smallTableValueRow = + VectorRandomRowSource.randomWritablePrimitiveRow( + columnCount, random, primitiveTypeInfos); for (int c = 0; c < smallTableValueRow.length; c++) { - smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableObjectInspectors[c]).copyObject(smallTableValueRow[c]); + smallTableValueRow[c] = ((PrimitiveObjectInspector) testDesc.smallTableValueObjectInspectors[c]).copyObject(smallTableValueRow[c]); } return new RowTestObjects(smallTableValueRow); } @@ -241,7 +300,7 @@ private static RowTestObjects generateRandomSmallTableValueRow(MapJoinTestDescri private void generateRandomSmallTableCounts(MapJoinTestDescription testDesc, Random random) { smallTableValueCounts = new ArrayList(); for (Entry testKeyEntry : smallTableKeyHashMap.entrySet()) { - final int valueCount = 1 + random.nextInt(19); + final int valueCount = 1 + random.nextInt(3); smallTableValueCounts.add(valueCount); } } @@ -258,14 +317,26 @@ private void generateRandomSmallTableValues(MapJoinTestDescription testDesc, Ran } } - private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos) { + private static GenerateType[] generateTypesFromTypeInfos(TypeInfo[] typeInfos, + int keyCount, boolean isOuterJoin) { final int size = typeInfos.length; GenerateType[] generateTypes = new GenerateType[size]; for (int i = 0; i < size; i++) { PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfos[i]; GenerateCategory category = - GenerateCategory.generateCategoryFromPrimitiveCategory(primitiveTypeInfo.getPrimitiveCategory()); - generateTypes[i] = new GenerateType(category); + GenerateCategory.generateCategoryFromPrimitiveCategory( + primitiveTypeInfo.getPrimitiveCategory()); + final boolean allowNulls; + if (i >= keyCount) { + + // Value columns can be NULL. + allowNulls = true; + } else { + + // Non-OUTER JOIN operators expect NULL keys to have been filtered out. + allowNulls = isOuterJoin; + } + generateTypes[i] = new GenerateType(category, allowNulls); } return generateTypes; } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java index bde4424..7115892 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/MapJoinTestDescription.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.exec.vector.mapjoin; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -28,19 +29,24 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.AbstractPrimitiveWritableObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; public class MapJoinTestDescription extends DescriptionTest { + public static enum MapJoinPlanVariation { + DYNAMIC_PARTITION_HASH_JOIN, + SHARED_SMALL_TABLE + } + public static class SmallTableGenerationParameters { public static enum ValueOption { NO_RESTRICTION, ONLY_ONE, - ONLY_TWO, - AT_LEAST_TWO + NO_REGULAR_SMALL_KEYS } private ValueOption valueOption; @@ -82,70 +88,103 @@ public int getNoMatchKeyOutOfAThousand() { final VectorMapJoinVariation vectorMapJoinVariation; // Adjustable. - public String[] bigTableColumnNames; + public String[] bigTableKeyColumnNames; public TypeInfo[] bigTableTypeInfos; + public int[] bigTableKeyColumnNums; - public String[] smallTableValueColumnNames; + public TypeInfo[] smallTableValueTypeInfos; - public int[] bigTableRetainColumnNums; + public int[] smallTableRetainKeyColumnNums; - public int[] smallTableRetainValueColumnNums; public SmallTableGenerationParameters smallTableGenerationParameters; // Derived. - public List bigTableColumnNamesList; - public String[] bigTableKeyColumnNames; - public TypeInfo[] bigTableKeyTypeInfos; - public List smallTableValueColumnNamesList; + + public int[] bigTableColumnNums; + public String[] bigTableColumnNames; + public List bigTableColumnNameList; public ObjectInspector[] bigTableObjectInspectors; - public List bigTableObjectInspectorsList; + public List bigTableObjectInspectorList; + + public TypeInfo[] bigTableKeyTypeInfos; + + public List smallTableKeyColumnNameList; + public String[] smallTableKeyColumnNames; + public TypeInfo[] smallTableKeyTypeInfos; + public ObjectInspector[] smallTableKeyObjectInspectors; + public List smallTableKeyObjectInspectorList; + + public List smallTableValueColumnNameList; + public String[] smallTableValueColumnNames; + public ObjectInspector[] smallTableValueObjectInspectors; + public List smallTableValueObjectInspectorList; + + public int[] bigTableRetainColumnNums; + public int[] smallTableRetainValueColumnNums; + + public String[] smallTableColumnNames; + public List smallTableColumnNameList; + public TypeInfo[] smallTableTypeInfos; + public List smallTableObjectInspectorList; + public StandardStructObjectInspector bigTableStandardObjectInspector; - public PrimitiveTypeInfo[] smallTableValuePrimitiveTypeInfos; - public ObjectInspector[] smallTableObjectInspectors; - public PrimitiveCategory[] smallTablePrimitiveCategories; - public List smallTableObjectInspectorsList; public StandardStructObjectInspector smallTableStandardObjectInspector; public ObjectInspector[] inputObjectInspectors; + public String[] outputColumnNames; public TypeInfo[] outputTypeInfos; public ObjectInspector[] outputObjectInspectors; + final MapJoinPlanVariation mapJoinPlanVariation; + + public MapJoinTestDescription ( + HiveConf hiveConf, + VectorMapJoinVariation vectorMapJoinVariation, + TypeInfo[] bigTableTypeInfos, + int[] bigTableKeyColumnNums, + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { + this( + hiveConf, + vectorMapJoinVariation, + /* bigTableColumnNames */ null, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + } + public MapJoinTestDescription ( HiveConf hiveConf, VectorMapJoinVariation vectorMapJoinVariation, - String[] bigTableColumnNames, TypeInfo[] bigTableTypeInfos, + String[] bigTableColumnNames, + TypeInfo[] bigTableTypeInfos, int[] bigTableKeyColumnNums, - String[] smallTableValueColumnNames, TypeInfo[] smallTableValueTypeInfos, - int[] bigTableRetainColumnNums, - int[] smallTableRetainKeyColumnNums, int[] smallTableRetainValueColumnNums, - SmallTableGenerationParameters smallTableGenerationParameters) { + TypeInfo[] smallTableValueTypeInfos, + int[] smallTableRetainKeyColumnNums, + SmallTableGenerationParameters smallTableGenerationParameters, + MapJoinPlanVariation mapJoinPlanVariation) { super(hiveConf); + this.vectorMapJoinVariation = vectorMapJoinVariation; this.bigTableColumnNames = bigTableColumnNames; this.bigTableTypeInfos = bigTableTypeInfos; this.bigTableKeyColumnNums = bigTableKeyColumnNums; - this.smallTableValueColumnNames = smallTableValueColumnNames; + this.smallTableValueTypeInfos = smallTableValueTypeInfos; - this.bigTableRetainColumnNums = bigTableRetainColumnNums; - this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums; - this.smallTableRetainValueColumnNums = smallTableRetainValueColumnNums; + + this.smallTableRetainKeyColumnNums = smallTableRetainKeyColumnNums;; this.smallTableGenerationParameters = smallTableGenerationParameters; - switch (vectorMapJoinVariation) { - case INNER_BIG_ONLY: - case LEFT_SEMI: - trimAwaySmallTableValueInfo(); - break; - case INNER: - case OUTER: - break; - default: - throw new RuntimeException("Unknown operator variation " + vectorMapJoinVariation); - } + this.mapJoinPlanVariation = mapJoinPlanVariation; computeDerived(); } @@ -155,45 +194,121 @@ public SmallTableGenerationParameters getSmallTableGenerationParameters() { } public void computeDerived() { - bigTableColumnNamesList = Arrays.asList(bigTableColumnNames); - bigTableKeyColumnNames = new String[bigTableKeyColumnNums.length]; - bigTableKeyTypeInfos = new TypeInfo[bigTableKeyColumnNums.length]; - for (int i = 0; i < bigTableKeyColumnNums.length; i++) { - bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNums[i]]; - bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNums[i]]; + final int bigTableSize = bigTableTypeInfos.length; + + if (bigTableKeyColumnNames == null) { + + // Automatically populate. + bigTableColumnNames = new String[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNames[i] = "_col" + i; + } } - smallTableValueColumnNamesList = Arrays.asList(smallTableValueColumnNames); + // Automatically populate. + bigTableColumnNums = new int[bigTableSize]; - bigTableObjectInspectors = new ObjectInspector[bigTableTypeInfos.length]; - for (int i = 0; i < bigTableTypeInfos.length; i++) { + for (int i = 0; i < bigTableSize; i++) { + bigTableColumnNums[i] = i; + } + + // Automatically populate. + bigTableRetainColumnNums = new int[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { + bigTableRetainColumnNums[i] = i; + } + + /* + * Big Table key information. + */ + final int keySize = bigTableKeyColumnNums.length; + + bigTableKeyColumnNames = new String[keySize]; + bigTableKeyTypeInfos = new TypeInfo[keySize]; + for (int i = 0; i < keySize; i++) { + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + bigTableKeyColumnNames[i] = bigTableColumnNames[bigTableKeyColumnNum]; + bigTableKeyTypeInfos[i] = bigTableTypeInfos[bigTableKeyColumnNum]; + } + + /* + * Big Table object inspectors. + */ + bigTableObjectInspectors = new ObjectInspector[bigTableSize]; + for (int i = 0; i < bigTableSize; i++) { bigTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((PrimitiveTypeInfo) bigTableTypeInfos[i]); + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) bigTableTypeInfos[i]); + } + bigTableColumnNameList = Arrays.asList(bigTableColumnNames); + bigTableObjectInspectorList = Arrays.asList(bigTableObjectInspectors); + + /* + * Small Table key object inspectors are derived directly from the Big Table key information. + */ + smallTableKeyColumnNames = new String[keySize]; + smallTableKeyTypeInfos = Arrays.copyOf(bigTableKeyTypeInfos, keySize); + smallTableKeyObjectInspectors = new ObjectInspector[keySize]; + for (int i = 0; i < keySize; i++) { + smallTableKeyColumnNames[i] = "_col" + i; + final int bigTableKeyColumnNum = bigTableKeyColumnNums[i]; + smallTableKeyObjectInspectors[i] = bigTableObjectInspectors[bigTableKeyColumnNum]; + } + smallTableKeyColumnNameList = Arrays.asList(smallTableKeyColumnNames); + smallTableKeyObjectInspectorList = Arrays.asList(smallTableKeyObjectInspectors); + + // First part of Small Table information is the key information. + smallTableColumnNameList = new ArrayList(smallTableKeyColumnNameList); + List smallTableTypeInfoList = + new ArrayList(Arrays.asList(smallTableKeyTypeInfos)); + smallTableObjectInspectorList = new ArrayList(); + smallTableObjectInspectorList.addAll(smallTableKeyObjectInspectorList); + + final int valueSize = smallTableValueTypeInfos.length; + + // Automatically populate. + smallTableValueColumnNames = new String[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueColumnNames[i] = "_col" + (keySize + i); } - bigTableObjectInspectorsList = Arrays.asList(bigTableObjectInspectors); - smallTableObjectInspectors = new ObjectInspector[smallTableValueTypeInfos.length]; - smallTablePrimitiveCategories = new PrimitiveCategory[smallTableValueTypeInfos.length]; - smallTableValuePrimitiveTypeInfos = new PrimitiveTypeInfo[smallTableValueTypeInfos.length]; - for (int i = 0; i < smallTableValueTypeInfos.length; i++) { - PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) smallTableValueTypeInfos[i]; - smallTableObjectInspectors[i] = - PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector(primitiveTypeInfo); - smallTablePrimitiveCategories[i] = primitiveTypeInfo.getPrimitiveCategory(); - smallTableValuePrimitiveTypeInfos[i] = primitiveTypeInfo; + smallTableValueObjectInspectors = new ObjectInspector[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableValueObjectInspectors[i] = + PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector( + (PrimitiveTypeInfo) smallTableValueTypeInfos[i]); } - smallTableObjectInspectorsList = Arrays.asList(smallTableObjectInspectors); + smallTableValueColumnNameList = Arrays.asList(smallTableValueColumnNames); + smallTableTypeInfoList.addAll(Arrays.asList(smallTableValueTypeInfos)); + smallTableValueObjectInspectorList = Arrays.asList(smallTableValueObjectInspectors); + smallTableColumnNameList.addAll(smallTableValueColumnNameList); + smallTableColumnNames = smallTableColumnNameList.toArray(new String[0]); + smallTableTypeInfos = smallTableTypeInfoList.toArray(new TypeInfo[0]); + + smallTableObjectInspectorList.addAll(smallTableValueObjectInspectorList); + + /* + * The inputObjectInspectors describe the keys and values of the Big Table and Small Table. + */ bigTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - bigTableColumnNamesList, Arrays.asList((ObjectInspector[]) bigTableObjectInspectors)); + bigTableColumnNameList, bigTableObjectInspectorList); smallTableStandardObjectInspector = ObjectInspectorFactory.getStandardStructObjectInspector( - smallTableValueColumnNamesList, Arrays.asList((ObjectInspector[]) smallTableObjectInspectors)); + smallTableColumnNameList, smallTableObjectInspectorList); inputObjectInspectors = - new ObjectInspector[] { bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + new ObjectInspector[] { + bigTableStandardObjectInspector, smallTableStandardObjectInspector }; + + // For now, we always retain the Small Table values... + // Automatically populate. + smallTableRetainValueColumnNums = new int[valueSize]; + for (int i = 0; i < valueSize; i++) { + smallTableRetainValueColumnNums[i] = i; + } int outputLength = bigTableRetainColumnNums.length + @@ -203,12 +318,13 @@ public void computeDerived() { outputTypeInfos = new TypeInfo[outputLength]; int outputIndex = 0; - for (int i = 0; i < bigTableRetainColumnNums.length; i++) { + final int bigTableRetainSize = bigTableRetainColumnNums.length; + for (int i = 0; i < bigTableRetainSize; i++) { outputTypeInfos[outputIndex++] = bigTableTypeInfos[bigTableRetainColumnNums[i]]; } - // for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { - // outputTypeInfos[outputIndex++] = smallTableTypeInfos[smallTableRetainKeyColumnNums[i]]; - // } + for (int i = 0; i < smallTableRetainKeyColumnNums.length; i++) { + outputTypeInfos[outputIndex++] = smallTableKeyTypeInfos[smallTableRetainKeyColumnNums[i]]; + } for (int i = 0; i < smallTableRetainValueColumnNums.length; i++) { outputTypeInfos[outputIndex++] = smallTableValueTypeInfos[smallTableRetainValueColumnNums[i]]; } @@ -221,13 +337,6 @@ public void computeDerived() { } } - public void trimAwaySmallTableValueInfo() { - smallTableValueColumnNames = new String[] {}; - smallTableValueTypeInfos = new TypeInfo[] {}; - smallTableRetainKeyColumnNums = new int[] {}; - smallTableRetainValueColumnNums = new int[] {}; - } - private String[] createOutputColumnNames(int outputColumnCount) { String[] outputColumnNames = new String[outputColumnCount]; int counter = 1; diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java new file mode 100644 index 0000000..fdd0342 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/NoOpExpression.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec.vector.mapjoin; + +import org.apache.hadoop.hive.ql.exec.vector.VectorExpressionDescriptor; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpression; + +/** + * An expression representing a column, only children are evaluated. + */ +public class NoOpExpression extends VectorExpression { + + private static final long serialVersionUID = 1L; + + public NoOpExpression() { + } + + public NoOpExpression(int colNum) { + super(colNum); + } + + @Override + public void evaluate(VectorizedRowBatch batch) { + } + + @Override + public String vectorExpressionParameters() { + return "noOpCol" + outputColumnNum + ":" + + getTypeName(outputTypeInfo, outputDataTypePhysicalVariation); + } + + @Override + public VectorExpressionDescriptor.Descriptor getDescriptor() { + return (new VectorExpressionDescriptor.Builder()).build(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java index 4c41f9c..f70e641 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/TestMapJoinOperator.java @@ -21,7 +21,9 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.common.type.HiveChar; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.exec.MapJoinOperator; @@ -50,6 +52,7 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorMapJoinOuterFilteredOperator; import org.apache.hadoop.hive.ql.exec.vector.VectorRandomRowSource; import org.apache.hadoop.hive.ql.exec.vector.VectorizationContext; +import org.apache.hadoop.hive.ql.exec.vector.VectorizationContextRegion; import org.apache.hadoop.hive.ql.exec.vector.VectorizedBatchUtil; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; @@ -57,8 +60,13 @@ import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.CreateMapJoinResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.MapJoinTestImplementation; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestConfig.TestMultiSetVectorCollectorOperator; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.MapJoinPlanVariation; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.MapJoinTestDescription.SmallTableGenerationParameters.ValueOption; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastMultiKeyHashMap; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastTableContainer; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VerifyFastRow; @@ -93,14 +101,18 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.typeinfo.CharTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.DecimalTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.Writable; import org.apache.hive.common.util.HashCodeUtil; import org.apache.hive.common.util.ReflectionUtil; import org.junit.Test; +import org.junit.Ignore; import java.io.IOException; import java.util.ArrayList; @@ -120,233 +132,1350 @@ public class TestMapJoinOperator { - /* - * This test collector operator is for MapJoin row-mode. - */ - private class TestMultiSetCollectorOperator extends RowCollectorTestOperator { + private boolean addLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, false); + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + HiveConf.setBoolVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_MINMAX_ENABLED, true); + break; + case 2: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + private boolean goodTestVariation(MapJoinTestDescription testDesc) { + final int smallTableValueSize = testDesc.smallTableRetainValueColumnNums.length; + + switch (testDesc.vectorMapJoinVariation) { + case INNER: + return (smallTableValueSize > 0); + case INNER_BIG_ONLY: + case LEFT_SEMI: + return (smallTableValueSize == 0); + case OUTER: + return true; + case FULL_OUTER: + return true; + default: + throw new RuntimeException( + "Unexpected vectorMapJoinVariation " + testDesc.vectorMapJoinVariation); + } + + } + + @Test + @Ignore + public void testLong0() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong0"); + + return false; + } + + @Test + @Ignore + public void testLong0_NoRegularKeys() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + doTestLong0_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + private boolean doTestLong0_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: no key retained, date value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong0_NoRegularKeys"); + + return false; + } + + @Test + @Ignore + public void testLong1() throws Exception { + long seed = 234882L; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong1( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong1(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, long value; Small Table: no key retained, string value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong1"); + + return false; + } + + @Test + public void testLong2() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong2( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong2(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: short key, no value; Small Table: key retained, timestamp value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.timestampTypeInfo}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong2"); + + return false; + } + + + @Test + public void testLong3() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong3"); + + return false; + } + + @Test + public void testLong3_NoRegularKeys() throws Exception { + long seed = 9934; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong3_NoRegularKeys( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong3_NoRegularKeys(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + smallTableGenerationParameters.setValueOption(ValueOption.NO_REGULAR_SMALL_KEYS); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, string value; Small Table: key retained, decimal value + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo, + TypeInfoFactory.stringTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + new DecimalTypeInfo(38, 18)}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "doTestLong3_NoRegularKeys"); + + return false; + } + + @Test + public void testLong4() throws Exception { + long seed = 3982; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong4( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong4(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: int key, no value; Small Table: no key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.intTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong4"); + + return false; + } + + @Test + public void testLong5() throws Exception { + long seed = 3553; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong5( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong5(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, no value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong5"); + + return false; + } + + @Test + public void testLong6() throws Exception { + long seed = 9384; + int rowCount = 10; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestLong6( + seed, rowCount, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestLong6(long seed, int rowCount, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + HiveConf hiveConf = new HiveConf(); + + if (!addLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + // Cause there to be no regular FULL OUTER MapJoin MATCHes so only non-match Small Table + // results. + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Big Table: long key, timestamp value; Small Table: key retained, no value + // (exercise INNER_BIGONLY, LEFT_SEMI) + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.timestampTypeInfo}; + + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testLong6"); + + return false; + } + + private boolean addNonLongHiveConfVariation(int hiveConfVariation, HiveConf hiveConf) { + + // Set defaults. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, -1); + + switch (hiveConfVariation) { + case 0: + break; + case 1: + // Force generateHashMapResultLargeMultiValue to be used. + HiveConf.setIntVar( + hiveConf, + HiveConf.ConfVars.HIVE_VECTORIZATION_MAPJOIN_NATIVE_OVERFLOW_REPEATED_THRESHOLD, 5); + break; + default: + return false; + } + return true; + } + + @Test + public void testMultiKey0() throws Exception { + long seed = 28322; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestMultiKey0(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { + + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; - private final RowTestObjectsMultiSet testRowMultiSet; + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.intTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1}; - public TestMultiSetCollectorOperator( - ObjectInspector[] outputObjectInspectors, - RowTestObjectsMultiSet testRowMultiSet) { - super(outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + smallTableRetainKeyColumnNums = new int[] {0, 1}; - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; - } + smallTableValueTypeInfos = new TypeInfo[] {}; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + //---------------------------------------------------------------------------------------------- - @Override - public String getName() { - return TestMultiSetCollectorOperator.class.getSimpleName(); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey0"); + + return false; + } + + @Test + public void testMultiKey1() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - private class TestMultiSetVectorCollectorOperator extends RowVectorCollectorTestOperator { + public boolean doTestMultiKey1(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - private final RowTestObjectsMultiSet testRowMultiSet; + int rowCount = 10; + + HiveConf hiveConf = new HiveConf(); - public RowTestObjectsMultiSet getTestRowMultiSet() { - return testRowMultiSet; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; } - public TestMultiSetVectorCollectorOperator(TypeInfo[] outputTypeInfos, - ObjectInspector[] outputObjectInspectors, RowTestObjectsMultiSet testRowMultiSet) - throws HiveException { - super(outputTypeInfos, outputObjectInspectors); - this.testRowMultiSet = testRowMultiSet; - } + TypeInfo[] bigTableTypeInfos = null; - public void nextTestRow(RowTestObjects testRow) { - testRowMultiSet.add(testRow); - } + int[] bigTableKeyColumnNums = null; - @Override - public String getName() { - return TestMultiSetVectorCollectorOperator.class.getSimpleName(); - } - } + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.timestampTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; - private static class KeyConfig { - long seed; - PrimitiveTypeInfo primitiveTypeInfo; - KeyConfig(long seed, PrimitiveTypeInfo primitiveTypeInfo) { - this.seed = seed; - this.primitiveTypeInfo = primitiveTypeInfo; + smallTableValueTypeInfos = + new TypeInfo[] {new DecimalTypeInfo(38, 18)}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testMultiKey1"); + + return false; } - private static KeyConfig[] longKeyConfigs = new KeyConfig[] { - new KeyConfig(234882L, TypeInfoFactory.longTypeInfo), - new KeyConfig(4600L, TypeInfoFactory.intTypeInfo), - new KeyConfig(98743L, TypeInfoFactory.shortTypeInfo)}; @Test - public void testLong() throws Exception { - for (KeyConfig longKeyConfig : longKeyConfigs) { + public void testMultiKey2() throws Exception { + long seed = 87543; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + hiveConfVariationsDone = + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestMultiKey2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); } - doTestLong(longKeyConfig.seed, longKeyConfig.primitiveTypeInfo, vectorMapJoinVariation); } - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestLong(long seed, TypeInfo numberTypeInfo, - VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestMultiKey2(long seed, int hiveConfVariation, VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"number1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.longTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.stringTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; - int[] bigTableRetainColumnNums = new int[] {0}; + int[] bigTableKeyColumnNums = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // Three key columns. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.longTypeInfo, + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0, 1, 2}; + + smallTableRetainKeyColumnNums = new int[] {0, 1, 2}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testMultiKey0"); + + return false; } @Test - public void testMultiKey() throws Exception { + public void testString0() throws Exception { long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString0( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestMultiKey(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestMultiKey(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString0(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1", "b2", "b3"}; - TypeInfo[] bigTableTypeInfos = + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; + + SmallTableGenerationParameters smallTableGenerationParameters = + new SmallTableGenerationParameters(); + + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One plain STRING key column. + bigTableTypeInfos = new TypeInfo[] { - TypeInfoFactory.intTypeInfo, - TypeInfoFactory.longTypeInfo, TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0, 1, 2}; + bigTableKeyColumnNums = new int[] {0}; - String[] smallTableValueColumnNames = new String[] {"sv1"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.stringTypeInfo}; + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + + //---------------------------------------------------------------------------------------------- + + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } + + // Prepare data. Good for ANY implementation variation. + testData = + new MapJoinTestData(rowCount, testDesc, seed, seed * 10); + + executeTest(testDesc, testData, "testString0"); + + return false; + } + + @Test + public void testString1() throws Exception { + long seed = 3422; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString1( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } + } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); + } + + public boolean doTestString1(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int[] bigTableRetainColumnNums = new int[] {0, 1, 2}; + int rowCount = 10; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0}; + HiveConf hiveConf = new HiveConf(); + + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } + + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; + + TypeInfo[] smallTableValueTypeInfos = null; + + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One BINARY key column. + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.binaryTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = + new TypeInfo[] { + TypeInfoFactory.shortTypeInfo, + TypeInfoFactory.floatTypeInfo, + new DecimalTypeInfo(38, 18)}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString1"); + + return false; } @Test - public void testString() throws Exception { - long seed = 87543; - for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { - if (vectorMapJoinVariation == VectorMapJoinVariation.NONE){ - continue; + public void testString2() throws Exception { + long seed = 7439; + + int hiveConfVariation = 0; + boolean hiveConfVariationsDone = false; + do { + for (VectorMapJoinVariation vectorMapJoinVariation : VectorMapJoinVariation.values()) { + hiveConfVariationsDone = + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.DYNAMIC_PARTITION_HASH_JOIN); + if (vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER){ + doTestString2( + seed, hiveConfVariation, vectorMapJoinVariation, + MapJoinPlanVariation.SHARED_SMALL_TABLE); + } } - doTestString(seed, vectorMapJoinVariation); - } + seed++; + hiveConfVariation++; + } while (!hiveConfVariationsDone); } - public void doTestString(long seed, VectorMapJoinVariation vectorMapJoinVariation) throws Exception { + public boolean doTestString2(long seed, int hiveConfVariation, + VectorMapJoinVariation vectorMapJoinVariation, + MapJoinPlanVariation mapJoinPlanVariation) throws Exception { - int rowCount = 10000; + int rowCount = 10; HiveConf hiveConf = new HiveConf(); - String[] bigTableColumnNames = new String[] {"b1"}; - TypeInfo[] bigTableTypeInfos = - new TypeInfo[] { - TypeInfoFactory.stringTypeInfo}; - int[] bigTableKeyColumnNums = new int[] {0}; + if (!addNonLongHiveConfVariation(hiveConfVariation, hiveConf)) { + return true; + } - String[] smallTableValueColumnNames = new String[] {"sv1", "sv2"}; - TypeInfo[] smallTableValueTypeInfos = - new TypeInfo[] {TypeInfoFactory.dateTypeInfo, TypeInfoFactory.timestampTypeInfo}; + TypeInfo[] bigTableTypeInfos = null; + + int[] bigTableKeyColumnNums = null; - int[] bigTableRetainColumnNums = new int[] {0}; + TypeInfo[] smallTableValueTypeInfos = null; - int[] smallTableRetainKeyColumnNums = new int[] {}; - int[] smallTableRetainValueColumnNums = new int[] {0, 1}; + int[] smallTableRetainKeyColumnNums = null; SmallTableGenerationParameters smallTableGenerationParameters = new SmallTableGenerationParameters(); + MapJoinTestDescription testDesc = null; + MapJoinTestData testData = null; + + // One STRING key column; Small Table value: NONE (tests INNER_BIG_ONLY, LEFT_SEMI). + bigTableTypeInfos = + new TypeInfo[] { + TypeInfoFactory.stringTypeInfo}; + bigTableKeyColumnNums = new int[] {0}; + + smallTableRetainKeyColumnNums = new int[] {0}; + + smallTableValueTypeInfos = new TypeInfo[] {}; + + smallTableGenerationParameters = + new SmallTableGenerationParameters(); + //---------------------------------------------------------------------------------------------- - MapJoinTestDescription testDesc = new MapJoinTestDescription( - hiveConf, vectorMapJoinVariation, - bigTableColumnNames, bigTableTypeInfos, - bigTableKeyColumnNums, - smallTableValueColumnNames, smallTableValueTypeInfos, - bigTableRetainColumnNums, - smallTableRetainKeyColumnNums, smallTableRetainValueColumnNums, - smallTableGenerationParameters); + testDesc = + new MapJoinTestDescription( + hiveConf, vectorMapJoinVariation, + bigTableTypeInfos, + bigTableKeyColumnNums, + smallTableValueTypeInfos, + smallTableRetainKeyColumnNums, + smallTableGenerationParameters, + mapJoinPlanVariation); + + if (!goodTestVariation(testDesc)) { + return false; + } // Prepare data. Good for ANY implementation variation. - MapJoinTestData testData = + testData = new MapJoinTestData(rowCount, testDesc, seed, seed * 10); - executeTest(testDesc, testData); + executeTest(testDesc, testData, "testString2"); + + return false; } private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTableRowObjects, @@ -357,14 +1486,32 @@ private void addBigTableRetained(MapJoinTestDescription testDesc, Object[] bigTa } } - private void addToOutput(MapJoinTestDescription testDesc, RowTestObjectsMultiSet expectedTestRowMultiSet, - Object[] outputObjects) { + private void addToOutput(MapJoinTestDescription testDesc, + RowTestObjectsMultiSet expectedTestRowMultiSet, Object[] outputObjects, + RowTestObjectsMultiSet.RowFlag rowFlag) { for (int c = 0; c < outputObjects.length; c++) { - PrimitiveObjectInspector primitiveObjInsp = ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); + PrimitiveObjectInspector primitiveObjInsp = + ((PrimitiveObjectInspector) testDesc.outputObjectInspectors[c]); Object outputObject = outputObjects[c]; outputObjects[c] = primitiveObjInsp.copyObject(outputObject); } - expectedTestRowMultiSet.add(new RowTestObjects(outputObjects)); + expectedTestRowMultiSet.add(new RowTestObjects(outputObjects), rowFlag); + } + + private String rowToCsvString(Object[] rowObjects) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < rowObjects.length; i++) { + if (sb.length() > 0) { + sb.append(","); + } + Object obj = rowObjects[i]; + if (obj == null) { + sb.append("\\N"); + } else { + sb.append(obj); + } + } + return sb.toString(); } /* @@ -377,7 +1524,7 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript RowTestObjectsMultiSet expectedTestRowMultiSet = new RowTestObjectsMultiSet(); VectorExtractRow vectorExtractRow = new VectorExtractRow(); - vectorExtractRow.init(testDesc.bigTableKeyTypeInfos); + vectorExtractRow.init(testDesc.bigTableTypeInfos); final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; Object[] bigTableRowObjects = new Object[bigTableColumnCount]; @@ -397,20 +1544,26 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); // Form key object array + boolean hasAnyNulls = false; // NULLs may be present in {FULL|LEFT|RIGHT} OUTER joins. for (int k = 0; k < bigTableKeyColumnCount; k++) { int keyColumnNum = testDesc.bigTableKeyColumnNums[k]; - bigTableKeyObjects[k] = bigTableRowObjects[keyColumnNum]; + Object keyObject = bigTableRowObjects[keyColumnNum]; + if (keyObject == null) { + hasAnyNulls = true; + } + bigTableKeyObjects[k] = keyObject; bigTableKeyObjects[k] = ((PrimitiveObjectInspector) testDesc.bigTableObjectInspectors[keyColumnNum]).copyObject(bigTableKeyObjects[k]); } RowTestObjects testKey = new RowTestObjects(bigTableKeyObjects); - if (testData.smallTableKeyHashMap.containsKey(testKey)) { + if (testData.smallTableKeyHashMap.containsKey(testKey) && !hasAnyNulls) { int smallTableKeyIndex = testData.smallTableKeyHashMap.get(testKey); switch (testDesc.vectorMapJoinVariation) { case INNER: case OUTER: + case FULL_OUTER: { // One row per value. ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); @@ -420,36 +1573,46 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + Object[] valueRow = valueList.get(v).getRow(); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } } break; case INNER_BIG_ONLY: - { - // Value count rows. - final int valueCount = testData.smallTableValueCounts.get(smallTableKeyIndex); - for (int v = 0; v < valueCount; v++) { - Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; - - addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); - } - } - break; case LEFT_SEMI: { - // One row (existence). Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + bigTableKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.REGULAR); } break; default: @@ -458,9 +1621,10 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript } else { - // No match. + // Big Table non-match. - if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER) { + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.OUTER || + testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { // We need to add a non-match row with nulls for small table values. @@ -468,14 +1632,74 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript addBigTableRetained(testDesc, bigTableRowObjects, outputObjects); - final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; - final int smallTableRetainValueColumnNumsLength = testDesc.smallTableRetainValueColumnNums.length; + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = null; + } + + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { - outputObjects[bigTableRetainColumnNumsLength + o] = null; + outputObjects[outputColumnNum++] = null; } - addToOutput(testDesc, expectedTestRowMultiSet, outputObjects); + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.LEFT_OUTER); + } + } + } + } + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + + System.out.println("*BENCHMARK* ----------------------------------------------------------------------"); + System.out.println("*BENCHMARK* FULL OUTER non-match key count " + + testData.fullOuterAdditionalSmallTableKeys.size()); + + // Fill in non-match Small Table key results. + for (RowTestObjects smallTableKey : testData.fullOuterAdditionalSmallTableKeys) { + + System.out.println( + "*BENCHMARK* fullOuterAdditionalSmallTableKey " + smallTableKey.toString()); + + int smallTableKeyIndex = testData.smallTableKeyHashMap.get(smallTableKey); + + // One row per value. + ArrayList valueList = testData.smallTableValues.get(smallTableKeyIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] outputObjects = new Object[testDesc.outputColumnNames.length]; + + // Non-match Small Table keys produce NULL Big Table columns. + final int bigTableRetainColumnNumsLength = testDesc.bigTableRetainColumnNums.length; + for (int o = 0; o < bigTableRetainColumnNumsLength; o++) { + outputObjects[o] = null; + } + + int outputColumnNum = testDesc.bigTableRetainColumnNums.length; + + // The output result may include 0, 1, or more small key columns... + Object[] smallKeyObjects = smallTableKey.getRow(); + final int smallTableRetainKeyColumnNumsLength = + testDesc.smallTableRetainKeyColumnNums.length; + for (int o = 0; o < smallTableRetainKeyColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + smallKeyObjects[testDesc.smallTableRetainKeyColumnNums[o]]; } + + Object[] valueRow = valueList.get(v).getRow(); + final int smallTableRetainValueColumnNumsLength = + testDesc.smallTableRetainValueColumnNums.length; + for (int o = 0; o < smallTableRetainValueColumnNumsLength; o++) { + outputObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + + addToOutput(testDesc, expectedTestRowMultiSet, outputObjects, + RowTestObjectsMultiSet.RowFlag.FULL_OUTER); } } } @@ -483,67 +1707,336 @@ private RowTestObjectsMultiSet createExpectedTestRowMultiSet(MapJoinTestDescript return expectedTestRowMultiSet; } - private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData) throws Exception { + private void generateBigAndSmallTableRowLogLines(MapJoinTestDescription testDesc, + MapJoinTestData testData) throws HiveException { + + // Generate Big Table rows log lines... + VectorExtractRow vectorExtractRow = new VectorExtractRow(); + vectorExtractRow.init(testDesc.bigTableTypeInfos); + + final int bigTableColumnCount = testDesc.bigTableTypeInfos.length; + Object[] bigTableRowObjects = new Object[bigTableColumnCount]; + + VectorBatchGenerateStream bigTableBatchStream = testData.getBigTableBatchStream(); + VectorizedRowBatch batch = testData.getBigTableBatch(); + bigTableBatchStream.reset(); + while (bigTableBatchStream.isNext()) { + batch.reset(); + bigTableBatchStream.fillNext(batch); + + final int size = testData.bigTableBatch.size; + for (int r = 0; r < size; r++) { + vectorExtractRow.extractRow(testData.bigTableBatch, r, bigTableRowObjects); + + System.out.println("*BIG TABLE* " + rowToCsvString(bigTableRowObjects)); + } + } + + // Generate Small Table rows log lines... + final int keyKeyColumnNumsLength = + testDesc.bigTableKeyColumnNums.length; + final int smallTableRetainValueLength = + testDesc.smallTableRetainValueColumnNums.length; + final int smallTableLength = keyKeyColumnNumsLength + smallTableRetainValueLength; + for (Entry entry : testData.smallTableKeyHashMap.entrySet()) { + if (smallTableRetainValueLength == 0) { + Object[] smallTableRowObjects = entry.getKey().getRow(); + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } else { + Integer valueIndex = entry.getValue(); + ArrayList valueList = testData.smallTableValues.get(valueIndex); + final int valueCount = valueList.size(); + for (int v = 0; v < valueCount; v++) { + Object[] smallTableRowObjects = new Object[smallTableLength]; + System.arraycopy(entry.getKey().getRow(), 0, smallTableRowObjects, 0, keyKeyColumnNumsLength); + int outputColumnNum = keyKeyColumnNumsLength; + Object[] valueRow = valueList.get(v).getRow(); + for (int o = 0; o < smallTableRetainValueLength; o++) { + smallTableRowObjects[outputColumnNum++] = + valueRow[testDesc.smallTableRetainValueColumnNums[o]]; + } + System.out.println("*SMALL TABLE* " + rowToCsvString(smallTableRowObjects)); + } + } + } + } + + private void executeTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + + // So stack trace is self-explanatory. + switch (testDesc.vectorMapJoinVariation) { + case INNER: + executeTestInner(testDesc, testData, title); + break; + case INNER_BIG_ONLY: + executeTestInnerBigOnly(testDesc, testData, title); + break; + case LEFT_SEMI: + executeTestLeftSemi(testDesc, testData, title); + break; + case OUTER: + executeTestOuter(testDesc, testData, title); + break; + case FULL_OUTER: + executeTestFullOuter(testDesc, testData, title); + break; + default: + throw new RuntimeException("Unexpected Vector MapJoin variation " + + testDesc.vectorMapJoinVariation); + } + } + + private void executeTestInner(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestInnerBigOnly(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestLeftSemi(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void executeTestFullOuter(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { + doExecuteTest(testDesc, testData, title); + } + + private void doExecuteTest(MapJoinTestDescription testDesc, MapJoinTestData testData, + String title) throws Exception { RowTestObjectsMultiSet expectedTestRowMultiSet = createExpectedTestRowMultiSet(testDesc, testData); - // UNDONE: Inner count - System.out.println("*BENCHMARK* expectedTestRowMultiSet rowCount " + expectedTestRowMultiSet.getRowCount() + - " totalCount " + expectedTestRowMultiSet.getTotalCount()); + generateBigAndSmallTableRowLogLines(testDesc, testData); + + System.out.println("*BENCHMARK* expectedTestRowMultiSet " + + " totalKeyCount " + expectedTestRowMultiSet.getTotalKeyCount() + + " totalValueCount " + expectedTestRowMultiSet.getTotalValueCount()); // Execute all implementation variations. for (MapJoinTestImplementation mapJoinImplementation : MapJoinTestImplementation.values()) { - executeTestImplementation(mapJoinImplementation, testDesc, testData, - expectedTestRowMultiSet); + + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Key match tracking not supported in plain Java HashMap. + continue; + } + switch (mapJoinImplementation) { + case ROW_MODE_HASH_MAP: + executeRowModeHashMap( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case ROW_MODE_OPTIMIZED: + executeRowModeOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case VECTOR_PASS_THROUGH: + executeVectorPassThrough( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_OPTIMIZED: + executeNativeVectorOptimized( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + case NATIVE_VECTOR_FAST: + executeNativeVectorFast( + testDesc, testData, + expectedTestRowMultiSet, + title); + break; + default: + throw new RuntimeException( + "Unexpected vector map join test variation"); + } } } - private boolean isVectorOutput(MapJoinTestImplementation mapJoinImplementation) { - return - (mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_HASH_MAP && - mapJoinImplementation != MapJoinTestImplementation.ROW_MODE_OPTIMIZED); + private void executeRowModeHashMap( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_HASH_MAP, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeRowModeOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.ROW_MODE_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeVectorPassThrough( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.VECTOR_PASS_THROUGH, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorOptimized( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_OPTIMIZED, + testDesc, testData, + expectedTestRowMultiSet, + title); + } + + private void executeNativeVectorFast( + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) + throws Exception { + executeTestImplementation( + MapJoinTestImplementation.NATIVE_VECTOR_FAST, + testDesc, testData, + expectedTestRowMultiSet, + title); } private void executeTestImplementation( MapJoinTestImplementation mapJoinImplementation, - MapJoinTestDescription testDesc, MapJoinTestData testData, RowTestObjectsMultiSet expectedTestRowMultiSet) + MapJoinTestDescription testDesc, MapJoinTestData testData, + RowTestObjectsMultiSet expectedTestRowMultiSet, + String title) throws Exception { - System.out.println("*BENCHMARK* Starting " + mapJoinImplementation + " test"); + System.out.println("*BENCHMARK* Starting implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + + " title " + title); // UNDONE: Parameterize for implementation variation? MapJoinDesc mapJoinDesc = MapJoinTestConfig.createMapJoinDesc(testDesc); - final boolean isVectorOutput = isVectorOutput(mapJoinImplementation); + final boolean isVectorOutput = MapJoinTestConfig.isVectorOutput(mapJoinImplementation); RowTestObjectsMultiSet outputTestRowMultiSet = new RowTestObjectsMultiSet(); - Operator testCollectorOperator = - (!isVectorOutput ? - new TestMultiSetCollectorOperator( - testDesc.outputObjectInspectors, outputTestRowMultiSet) : - new TestMultiSetVectorCollectorOperator( - testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet)); - - MapJoinOperator operator = + CreateMapJoinResult result = MapJoinTestConfig.createMapJoinImplementation( - mapJoinImplementation, testDesc, testCollectorOperator, testData, mapJoinDesc); + mapJoinImplementation, testDesc, testData, mapJoinDesc); + MapJoinOperator mapJoinOperator = result.mapJoinOperator; + MapJoinTableContainer mapJoinTableContainer = result.mapJoinTableContainer; + MapJoinTableContainerSerDe mapJoinTableContainerSerDe = result.mapJoinTableContainerSerDe; + + CountCollectorTestOperator testCollectorOperator; + if (!isVectorOutput) { + testCollectorOperator = + new TestMultiSetCollectorOperator( + testDesc.outputObjectInspectors, outputTestRowMultiSet); + } else { + VectorizationContext vOutContext = + ((VectorizationContextRegion) mapJoinOperator).getOutputVectorizationContext(); + testCollectorOperator = + new TestMultiSetVectorCollectorOperator( + ArrayUtils.toPrimitive(vOutContext.getProjectedColumns().toArray(new Integer[0])), + testDesc.outputTypeInfos, testDesc.outputObjectInspectors, outputTestRowMultiSet); + } + + MapJoinTestConfig.connectOperators(mapJoinOperator, testCollectorOperator); + + CountCollectorTestOperator interceptTestCollectorOperator = null; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + + if (mapJoinImplementation == MapJoinTestImplementation.ROW_MODE_HASH_MAP) { + + // Not supported. + return; + } + + // Wire in FULL OUTER Intercept. + interceptTestCollectorOperator = + MapJoinTestConfig.addFullOuterIntercept( + mapJoinImplementation, testDesc, outputTestRowMultiSet, testData, + mapJoinOperator, mapJoinTableContainer, mapJoinTableContainerSerDe); + } else { + + // Invoke initializeOp methods. + mapJoinOperator.initialize( + testDesc.hiveConf, testDesc.inputObjectInspectors); + + // Fixup the mapJoinTables. + mapJoinOperator.setTestMapJoinTableContainer( + 1, mapJoinTableContainer, mapJoinTableContainerSerDe); + } if (!isVectorOutput) { - MapJoinTestData.driveBigTableData(testDesc, testData, operator); + MapJoinTestData.driveBigTableData(testDesc, testData, mapJoinOperator); } else { - MapJoinTestData.driveVectorBigTableData(testDesc, testData, operator); + MapJoinTestData.driveVectorBigTableData(testDesc, testData, mapJoinOperator); + } + + if (!testCollectorOperator.getIsClosed()) { + Assert.fail("collector operator not closed"); + } + if (testCollectorOperator.getIsAborted()) { + Assert.fail("collector operator aborted"); + } + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER && + !mapJoinDesc.isDynamicPartitionHashJoin()) { + if (!interceptTestCollectorOperator.getIsClosed()) { + Assert.fail("intercept collector operator not closed"); + } + if (interceptTestCollectorOperator.getIsAborted()) { + Assert.fail("intercept collector operator aborted"); + } } System.out.println("*BENCHMARK* executeTestImplementation row count " + - ((CountCollectorTestOperator) testCollectorOperator).getRowCount()); + testCollectorOperator.getRowCount()); // Verify the output! - if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet)) { - System.out.println("*BENCHMARK* verify failed for " + mapJoinImplementation); + String option = ""; + if (testDesc.vectorMapJoinVariation == VectorMapJoinVariation.FULL_OUTER) { + option = " mapJoinPlanVariation " + testDesc.mapJoinPlanVariation.name(); + } + if (!expectedTestRowMultiSet.verify(outputTestRowMultiSet, "expected", "actual")) { + System.out.println("*BENCHMARK* " + title + " verify failed" + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); + expectedTestRowMultiSet.displayDifferences(outputTestRowMultiSet, "expected", "actual"); } else { - System.out.println("*BENCHMARK* verify succeeded for " + mapJoinImplementation); + System.out.println("*BENCHMARK* " + title + " verify succeeded " + + " for implementation " + mapJoinImplementation + + " variation " + testDesc.vectorMapJoinVariation + option); } } } \ No newline at end of file diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java index 09dcb83..3ce061d 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/CheckFastHashTable.java @@ -30,11 +30,15 @@ import junit.framework.TestCase; import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.persistence.MatchTracker; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringExpr; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMultiSetResult; import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashSetResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinNonMatchedIterator; +import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.WriteBuffers; +import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.WritableComparator; @@ -197,6 +201,20 @@ public long getKey(int index) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastLongHashMap map, int index, MatchTracker matchTracker) { + FastLongHashMapElement element = array[index]; + long longKey = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(longKey, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastLongHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -204,18 +222,77 @@ public void verify(VectorMapJoinFastLongHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(long searchLong) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastLongHashMapElement element = array[index]; - long key = element.getKey(); - List values = element.getValues(); + long longKey = element.getKey(); + if (longKey == searchLong) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastLongHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedLongKey(); + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + long longKey = nonMatchedIterator.getNonMatchedLongKey(); + int index = findKeyInArray(longKey); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastLongHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } @@ -247,6 +324,11 @@ public int getValueCount() { public void addValue(byte[] value) { values.add(value); } + + @Override + public String toString() { + return "Key length " + key.length + ", value count " + values.size(); + } } /* @@ -310,6 +392,21 @@ public void add(byte[] key, byte[] value) { return array[index].getValues(); } + private void verifyOne(VectorMapJoinFastBytesHashMap map, int index, + MatchTracker matchTracker) { + FastBytesHashMapElement element = array[index]; + byte[] key = element.getKey(); + List values = element.getValues(); + + VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); + JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult, matchTracker); + if (joinResult != JoinUtil.JoinResult.MATCH) { + assertTrue(false); + } + + verifyHashMapValues(hashMapResult, values); + } + public void verify(VectorMapJoinFastBytesHashMap map) { int mapSize = map.size(); if (mapSize != count) { @@ -317,18 +414,82 @@ public void verify(VectorMapJoinFastBytesHashMap map) { } for (int index = 0; index < count; index++) { + verifyOne(map, index, null); + } + } + + private int findKeyInArray(byte[] searchKeyBytes, int searchKeyOffset, int searchKeyLength) { + + // Brute force search. + for (int index = 0; index < count; index++) { FastBytesHashMapElement element = array[index]; - byte[] key = element.getKey(); - List values = element.getValues(); + byte[] keyBytes = element.getKey(); + if (keyBytes.length == searchKeyLength && + StringExpr.equal( + keyBytes, 0, keyBytes.length, + searchKeyBytes, searchKeyOffset, searchKeyLength)) { + return index; + } + } + return -1; + } - VectorMapJoinHashMapResult hashMapResult = map.createHashMapResult(); - JoinUtil.JoinResult joinResult = map.lookup(key, 0, key.length, hashMapResult); - if (joinResult != JoinUtil.JoinResult.MATCH) { - assertTrue(false); + // We assume there have been no reads/lookups before this call. + // And, keys are *UNIQUE*. + public void verifyNonMatched(VectorMapJoinFastBytesHashMap map, Random random) + throws HiveException { + int mapSize = map.size(); + if (mapSize != count) { + TestCase.fail("map.size() does not match expected count"); + } + + MatchTracker matchTracker = map.createMatchTracker(); + boolean nonMatched[] = new boolean[mapSize]; + int nonMatchedCount = 0; + for (int index = 0; index < count; index++) { + nonMatched[index] = random.nextBoolean(); + if (!nonMatched[index]) { + verifyOne(map, index, matchTracker); + } else { + nonMatchedCount++; } + } + + boolean returnedNonMatched[] = new boolean[mapSize]; + int returnedNonMatchedCount = 0; + VectorMapJoinNonMatchedIterator nonMatchedIterator = + map.createNonMatchedIterator(matchTracker); + nonMatchedIterator.init(); + + while (nonMatchedIterator.findNextNonMatched()) { + boolean isNull = !nonMatchedIterator.readNonMatchedBytesKey();; + if (isNull) { + TestCase.fail("NULL key found in expected keys"); + } + byte[] keyBytes = nonMatchedIterator.getNonMatchedBytes(); + int keyOffset = nonMatchedIterator.getNonMatchedBytesOffset(); + int keyLength = nonMatchedIterator.getNonMatchedBytesLength(); + int index = findKeyInArray(keyBytes, keyOffset, keyLength); + if (index == -1) { + TestCase.fail("non-matched key not found in expected keys"); + } + if (!nonMatched[index]) { + TestCase.fail("non-matched key not one of the expected non-matched keys"); + } + if (returnedNonMatched[index]) { + TestCase.fail("non-matched key already returned"); + } + returnedNonMatched[index] = true; + returnedNonMatchedCount++; + VectorMapJoinHashMapResult hashMapResult = nonMatchedIterator.getNonMatchedHashMapResult(); + FastBytesHashMapElement element = array[index]; + List values = element.getValues(); verifyHashMapValues(hashMapResult, values); } + if (nonMatchedCount != returnedNonMatchedCount) { + TestCase.fail("non-matched key count mismatch"); + } } } diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java new file mode 100644 index 0000000..6833553 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashMapNonMatched.java @@ -0,0 +1,171 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastBytesHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.junit.Test; + +/* + * An multi-key value hash map optimized for vector map join. + * + * The key is uninterpreted bytes. + */ +public class TestVectorMapJoinFastBytesHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(82733); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(29383); + + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + byte[] key = new byte[random.nextInt(MAX_KEY_LENGTH)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, MAX_KEY_LENGTH, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastMultiKeyHashMap map, VerifyFastBytesHashMap verifyTable, + int maxKeyLength, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + byte[] key; + while (true) { + key = new byte[random.nextInt(maxKeyLength)]; + random.nextBytes(key); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + byte[] randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(9332); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testReallyBig() throws Exception { + random = new Random(42662); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastMultiKeyHashMap map = + new VectorMapJoinFastMultiKeyHashMap( + false,LARGE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastBytesHashMap verifyTable = new VerifyFastBytesHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java index cbd77d1..fb8be91 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastBytesHashSet.java @@ -37,7 +37,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -65,7 +65,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -91,7 +91,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -125,7 +125,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,CAPACITY, 1f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(CAPACITY, 1f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastMultiKeyHashSet map = - new VectorMapJoinFastMultiKeyHashSet(false,1, 0.0000001f, WB_SIZE, -1); + new VectorMapJoinFastMultiKeyHashSet(1, 0.0000001f, WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -227,7 +227,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); @@ -242,7 +242,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastMultiKeyHashSet map = new VectorMapJoinFastMultiKeyHashSet( - false,MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastBytesHashSet verifyTable = new VerifyFastBytesHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java new file mode 100644 index 0000000..8e53501 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashMapNonMatched.java @@ -0,0 +1,198 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast; + +import java.io.IOException; +import java.util.Random; + +import org.apache.hadoop.hive.ql.exec.JoinUtil; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.hashtable.VectorMapJoinHashMapResult; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.CheckFastHashTable.VerifyFastLongHashMap; +import org.apache.hadoop.hive.ql.exec.vector.mapjoin.fast.VectorMapJoinFastLongHashMap; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.VectorMapJoinDesc.HashTableKeyType; +import org.junit.Test; + +import static org.junit.Assert.*; + +public class TestVectorMapJoinFastLongHashMapNonMatched extends CommonFastHashTable { + + @Test + public void testOneKey() throws Exception { + random = new Random(33221); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + long key = random.nextLong(); + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Second value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + // Third value. + value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + map.testPutRow(key, value); + verifyTable.add(key, value); + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysSingleValue() throws Exception { + random = new Random(900); + + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100 + random.nextInt(1000); + for (int i = 0; i < keyCount; i++) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testExpand() throws Exception { + random = new Random(5227); + + // Start with capacity 1; make sure we expand on every put. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + for (int i = 0; i < 18; ++i) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + byte[] value = new byte[random.nextInt(MAX_VALUE_LENGTH)]; + random.nextBytes(value); + + map.testPutRow(key, value); + verifyTable.add(key, value); + } + + verifyTable.verifyNonMatched(map, random); + // assertEquals(1 << 18, map.getCapacity()); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable) + throws HiveException, IOException { + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable, -1); + } + + public void addAndVerifyMultipleKeyMultipleValue(int keyCount, + VectorMapJoinFastLongHashMap map, VerifyFastLongHashMap verifyTable, int fixedValueLength) + throws HiveException, IOException { + for (int i = 0; i < keyCount; i++) { + byte[] value; + if (fixedValueLength == -1) { + value = new byte[generateLargeCount() - 1]; + } else { + value = new byte[fixedValueLength]; + } + random.nextBytes(value); + + // Add a new key or add a value to an existing key? + if (random.nextBoolean() || verifyTable.getCount() == 0) { + long key; + while (true) { + key = random.nextLong(); + if (!verifyTable.contains(key)) { + // Unique keys for this test. + break; + } + } + + map.testPutRow(key, value); + verifyTable.add(key, value); + } else { + long randomExistingKey = verifyTable.addRandomExisting(value, random); + map.testPutRow(randomExistingKey, value); + } + } + + verifyTable.verifyNonMatched(map, random); + } + + @Test + public void testMultipleKeysMultipleValue() throws Exception { + random = new Random(8); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } + + @Test + public void testLargeAndExpand() throws Exception { + random = new Random(20); + + // Use a large capacity that doesn't require expansion, yet. + VectorMapJoinFastLongHashMap map = + new VectorMapJoinFastLongHashMap( + false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + + VerifyFastLongHashMap verifyTable = new VerifyFastLongHashMap(); + + int keyCount = 100; + addAndVerifyMultipleKeyMultipleValue(keyCount, map, verifyTable); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java index bbb5da0..f64d180 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/TestVectorMapJoinFastLongHashSet.java @@ -39,7 +39,7 @@ public void testOneKey() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -66,7 +66,7 @@ public void testMultipleKeysSingleValue() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -94,7 +94,7 @@ public void testGetNonExistent() throws Exception { VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, LOAD_FACTOR, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -126,7 +126,7 @@ public void testFullMap() throws Exception { // Make sure the map does not expand; should be able to find space. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); + false, HashTableKeyType.LONG, CAPACITY, 1f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -167,7 +167,7 @@ public void testExpand() throws Exception { // Start with capacity 1; make sure we expand on every put. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); + false, HashTableKeyType.LONG, 1, 0.0000001f, WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -225,7 +225,7 @@ public void testMultipleKeysMultipleValue() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); + false, HashTableKeyType.LONG, LARGE_CAPACITY, LOAD_FACTOR, LARGE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); @@ -240,7 +240,7 @@ public void testLargeAndExpand() throws Exception { // Use a large capacity that doesn't require expansion, yet. VectorMapJoinFastLongHashSet map = new VectorMapJoinFastLongHashSet( - false, false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); + false, HashTableKeyType.LONG, MODERATE_CAPACITY, LOAD_FACTOR, MODERATE_WB_SIZE, -1); VerifyFastLongHashSet verifyTable = new VerifyFastLongHashSet(); diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java index 4412425..ff993aa 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/mapjoin/fast/VerifyFastRow.java @@ -600,8 +600,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java index 793a676..ab1a829 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorBatchGenerator.java @@ -23,8 +23,10 @@ import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; @@ -109,15 +111,25 @@ public static GenerateCategory generateCategoryFromPrimitiveCategory(PrimitiveCa } private GenerateCategory category; + private boolean allowNulls; public GenerateType(GenerateCategory category) { this.category = category; } + public GenerateType(GenerateCategory category, boolean allowNulls) { + this.category = category; + this.allowNulls = allowNulls; + } + public GenerateCategory getCategory() { return category; } + public boolean getAllowNulls() { + return allowNulls; + } + /* * BOOLEAN .. LONG: Min and max. */ @@ -189,16 +201,24 @@ public void assignColumnVectors(VectorizedRowBatch batch, int columnNum, break; case STRING: + case CHAR: + case VARCHAR: + case BINARY: colVector = new BytesColumnVector(); break; - // UNDONE - case DATE: case TIMESTAMP: - case BINARY: + colVector = new TimestampColumnVector(); + break; + case DECIMAL: - case VARCHAR: - case CHAR: + colVector = new DecimalColumnVector(38, 18); + break; + + // UNDONE + case DATE: + + case LIST: case MAP: case STRUCT: diff --git ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java index 1064b19..22a1cd8 100644 --- ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java +++ ql/src/test/org/apache/hadoop/hive/ql/exec/vector/util/batchgen/VectorColumnGroupGenerator.java @@ -22,15 +22,19 @@ import java.util.Arrays; import java.util.Random; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveVarchar; import org.apache.hadoop.hive.common.type.RandomTypeUtil; import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType; import org.apache.hadoop.hive.ql.exec.vector.util.batchgen.VectorBatchGenerator.GenerateType.GenerateCategory; +import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.io.BooleanWritable; import org.apache.hadoop.io.Text; @@ -39,6 +43,7 @@ private GenerateType[] generateTypes; private int[] columnNums; private Object[] arrays; + private boolean[][] isNullArrays; public VectorColumnGroupGenerator(int columnNum, GenerateType generateType) { columnNums = new int[] {columnNum}; @@ -61,6 +66,7 @@ public VectorColumnGroupGenerator(int startColumnNum, GenerateType[] generateTyp private void allocateArrays(int size) { arrays = new Object[generateTypes.length]; + isNullArrays = new boolean[generateTypes.length][]; for (int i = 0; i < generateTypes.length; i++) { GenerateType generateType = generateTypes[i]; GenerateCategory category = generateType.getCategory(); @@ -90,24 +96,34 @@ private void allocateArrays(int size) { case STRING: array = new String[size]; break; + case BINARY: + array = new byte[size][]; + break; case TIMESTAMP: array = new Timestamp[size]; break; + case CHAR: + array = new HiveChar[size]; + break; + case VARCHAR: + array = new HiveVarchar[size]; + break; + case DECIMAL: + array = new HiveDecimalWritable[size]; + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unexpected generate category " + category); } arrays[i] = array; + isNullArrays[i] = new boolean[size]; } } @@ -141,16 +157,24 @@ public void clearColumnValueArrays() { case STRING: Arrays.fill(((String[]) array), null); break; + case BINARY: + Arrays.fill(((byte[][]) array), null); + break; case TIMESTAMP: Arrays.fill(((Timestamp[]) array), null); break; + case CHAR: + Arrays.fill(((HiveChar[]) array), null); + break; + case VARCHAR: + Arrays.fill(((HiveVarchar[]) array), null); + break; + case DECIMAL: + Arrays.fill(((HiveDecimalWritable[]) array), null); + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -170,6 +194,11 @@ public void generateRowValues(int rowIndex, Random random) { private void generateRowColumnValue(int rowIndex, int columnIndex, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + if (allowNulls && random.nextInt(100) < 5) { + isNullArrays[columnIndex][rowIndex] = true; + return; + } Object array = arrays[columnIndex]; switch (category) { case BOOLEAN: @@ -230,6 +259,13 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case BINARY: + { + byte[] value = RandomTypeUtil.getRandBinary(random, 10); + ((byte[][]) array)[rowIndex] = value; + } + break; + case TIMESTAMP: { Timestamp value = RandomTypeUtil.getRandTimestamp(random); @@ -237,14 +273,36 @@ private void generateRowColumnValue(int rowIndex, int columnIndex, Random random } break; + case CHAR: + { + // UNDONE: Use CharTypeInfo.maxLength + HiveChar value = + new HiveChar(RandomTypeUtil.getRandString(random), 10); + ((HiveChar[]) array)[rowIndex] = value; + } + break; + + case VARCHAR: + { + // UNDONE: Use VarcharTypeInfo.maxLength + HiveVarchar value = + new HiveVarchar(RandomTypeUtil.getRandString(random), 10); + ((HiveVarchar[]) array)[rowIndex] = value; + } + break; + + case DECIMAL: + { + HiveDecimalWritable value = + new HiveDecimalWritable(RandomTypeUtil.getRandHiveDecimal(random)); + ((HiveDecimalWritable[]) array)[rowIndex] = value; + } + break; + // UNDONE case DATE: // UNDONE: Needed to longTest? - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: case LIST: case MAP: @@ -263,7 +321,15 @@ public void fillDownRowValues(int rowIndex, int seriesCount, Random random) { private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCount, Random random) { GenerateType generateType = generateTypes[columnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); Object array = arrays[columnIndex]; + boolean[] isNull = isNullArrays[columnIndex]; + if (allowNulls && isNull[rowIndex]) { + for (int i = 1; i < seriesCount; i++) { + isNull[rowIndex + i] = true; + } + return; + } switch (category) { case BOOLEAN: { @@ -337,6 +403,15 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + byte[] value = byteArrayArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + byteArrayArray[rowIndex + i] = value; + } + } + break; case TIMESTAMP: { Timestamp[] timestampArray = ((Timestamp[]) array); @@ -346,15 +421,37 @@ private void fillDownRowColumnValue(int rowIndex, int columnIndex, int seriesCou } } break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + HiveChar value = hiveCharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveCharArray[rowIndex + i] = value; + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveVarcharArray = ((HiveVarchar[]) array); + HiveVarchar value = hiveVarcharArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveVarcharArray[rowIndex + i] = value; + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + HiveDecimalWritable value = hiveDecimalWritableArray[rowIndex]; + for (int i = 1; i < seriesCount; i++) { + hiveDecimalWritableArray[rowIndex + i] = value; + } + } + break; // UNDONE case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: @@ -389,6 +486,16 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde GenerateType generateType = generateTypes[logicalColumnIndex]; GenerateCategory category = generateType.getCategory(); + boolean allowNulls = generateType.getAllowNulls(); + boolean[] isNull = isNullArrays[logicalColumnIndex]; + if (allowNulls) { + for (int i = 0; i < size; i++) { + if (isNull[i]) { + colVector.isNull[i] = true; + colVector.noNulls = false; + } + } + } Object array = arrays[logicalColumnIndex]; switch (category) { case BOOLEAN: @@ -396,7 +503,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde boolean[] booleanArray = ((boolean[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = (booleanArray[i] ? 1 : 0); + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = (booleanArray[i] ? 1 : 0); + } } } break; @@ -405,7 +516,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde byte[] byteArray = ((byte[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = byteArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = byteArray[i]; + } } } break; @@ -414,7 +529,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde short[] shortArray = ((short[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = shortArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = shortArray[i]; + } } } break; @@ -423,7 +542,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde int[] intArray = ((int[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = intArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = intArray[i]; + } } } break; @@ -432,7 +555,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde long[] longArray = ((long[]) array); long[] vector = ((LongColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = longArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = longArray[i]; + } } } break; @@ -441,7 +568,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde float[] floatArray = ((float[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = floatArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = floatArray[i]; + } } } break; @@ -450,7 +581,11 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde double[] doubleArray = ((double[]) array); double[] vector = ((DoubleColumnVector) colVector).vector; for (int i = 0; i < size; i++) { - vector[i] = doubleArray[i]; + if (isNull[i]) { + vector[i] = 0; + } else { + vector[i] = doubleArray[i]; + } } } break; @@ -459,8 +594,22 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde String[] stringArray = ((String[]) array); BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); for (int i = 0; i < size; i++) { - byte[] bytes = stringArray[i].getBytes(); - bytesColVec.setVal(i, bytes); + if (!isNull[i]) { + byte[] bytes = stringArray[i].getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case BINARY: + { + byte[][] byteArrayArray = ((byte[][]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = byteArrayArray[i]; + bytesColVec.setVal(i, bytes); + } } } break; @@ -469,8 +618,46 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde Timestamp[] timestampArray = ((Timestamp[]) array); TimestampColumnVector timestampColVec = ((TimestampColumnVector) colVector); for (int i = 0; i < size; i++) { - Timestamp timestamp = timestampArray[i]; - timestampColVec.set(i, timestamp); + if (!isNull[i]) { + Timestamp timestamp = timestampArray[i]; + timestampColVec.set(i, timestamp); + } + } + } + break; + case CHAR: + { + HiveChar[] hiveCharArray = ((HiveChar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case VARCHAR: + { + HiveVarchar[] hiveCharArray = ((HiveVarchar[]) array); + BytesColumnVector bytesColVec = ((BytesColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + byte[] bytes = hiveCharArray[i].getValue().getBytes(); + bytesColVec.setVal(i, bytes); + } + } + } + break; + case DECIMAL: + { + HiveDecimalWritable[] hiveDecimalWritableArray = ((HiveDecimalWritable[]) array); + DecimalColumnVector decimalColVec = ((DecimalColumnVector) colVector); + for (int i = 0; i < size; i++) { + if (!isNull[i]) { + HiveDecimalWritable decWritable = hiveDecimalWritableArray[i]; + decimalColVec.set(i, decWritable); + } } } break; @@ -479,16 +666,12 @@ private void populateBatchColumn(VectorizedRowBatch batch, int logicalColumnInde case DATE: - case BINARY: - case DECIMAL: - case VARCHAR: - case CHAR: - case LIST: case MAP: case STRUCT: case UNION: default: + throw new RuntimeException("Unepected generate category " + category); } } } \ No newline at end of file diff --git ql/src/test/queries/clientpositive/auto_join_filters.q ql/src/test/queries/clientpositive/auto_join_filters.q index a44ffb3..f93c494 100644 --- ql/src/test/queries/clientpositive/auto_join_filters.q +++ ql/src/test/queries/clientpositive/auto_join_filters.q @@ -7,7 +7,12 @@ LOAD DATA LOCAL INPATH '../../data/files/in3.txt' INTO TABLE myinput1; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -24,10 +29,18 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); @@ -51,7 +64,12 @@ SET hive.input.format = org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -68,10 +86,18 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/auto_join_nulls.q ql/src/test/queries/clientpositive/auto_join_nulls.q index 279fd32..c7d5770 100644 --- ql/src/test/queries/clientpositive/auto_join_nulls.q +++ ql/src/test/queries/clientpositive/auto_join_nulls.q @@ -19,10 +19,18 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN m SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; +SET hive.mapjoin.full.outer=false; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value; +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key; +SET hive.merge.nway.joins=true; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/correlationoptimizer1.q ql/src/test/queries/clientpositive/correlationoptimizer1.q index d16904e..3f90077 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer1.q +++ ql/src/test/queries/clientpositive/correlationoptimizer1.q @@ -214,6 +214,7 @@ set hive.optimize.correlation=false; -- they share the same key. Because those keys with a null value are not grouped -- in the output of the Full Outer Join, we cannot use a single MR to execute -- these two operators. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -225,7 +226,35 @@ FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt @@ -236,6 +265,7 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT x.key AS key, count(1) AS cnt FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) GROUP BY x.key) tmp; +SET hive.merge.nway.joins=true; set hive.auto.convert.join=false; set hive.optimize.correlation=false; diff --git ql/src/test/queries/clientpositive/correlationoptimizer2.q ql/src/test/queries/clientpositive/correlationoptimizer2.q index eeccd24..7480346 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer2.q +++ ql/src/test/queries/clientpositive/correlationoptimizer2.q @@ -94,6 +94,7 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 set hive.optimize.correlation=false; -- Full Outer Join should be handled. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -107,7 +108,39 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -120,8 +153,28 @@ FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; set hive.optimize.correlation=false; + +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -136,11 +189,13 @@ FROM (SELECT a.key AS key, count(1) AS cnt FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.merge.nway.joins=false; set hive.optimize.correlation=true; -- After FULL OUTER JOIN, keys with null values are not grouped, right now, -- we have to generate 2 MR jobs for tmp, 1 MR job for a join b and another for the -- GroupByOperator on key. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT a.key AS key, count(1) AS cnt @@ -156,11 +211,30 @@ FROM (SELECT a.key AS key, count(1) AS cnt ON (a.key = b.key) GROUP BY a.key) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=false; -- When Correlation Optimizer is turned off, we need 4 MR jobs. -- When Correlation Optimizer is turned on, the subquery of tmp will be evaluated in -- a single MR job (including the subquery a, the subquery b, and a join b). So, we -- will have 2 MR jobs. +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -174,7 +248,39 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; + set hive.optimize.correlation=true; +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp; + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 @@ -187,3 +293,4 @@ FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b ON (a.key = b.key)) tmp; +SET hive.merge.nway.joins=true; diff --git ql/src/test/queries/clientpositive/correlationoptimizer4.q ql/src/test/queries/clientpositive/correlationoptimizer4.q index 02edeff..69d400c 100644 --- ql/src/test/queries/clientpositive/correlationoptimizer4.q +++ ql/src/test/queries/clientpositive/correlationoptimizer4.q @@ -135,6 +135,19 @@ FROM (SELECT y.key AS key, count(1) AS cnt set hive.optimize.correlation=false; -- This case should not be optimized because afer the FULL OUTER JOIN, rows with null keys -- are not grouped. +set hive.auto.convert.join=false; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt @@ -147,6 +160,7 @@ FROM (SELECT y.key AS key, count(1) AS cnt GROUP BY y.key) tmp; set hive.optimize.correlation=true; +set hive.auto.convert.join=false; EXPLAIN SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt @@ -157,3 +171,15 @@ SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) FROM (SELECT y.key AS key, count(1) AS cnt FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) GROUP BY y.key) tmp; + +set hive.auto.convert.join=true; +EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; + +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..1e9e971 --- /dev/null +++ ql/src/test/queries/clientpositive/fullouter_mapjoin_1_optimized.q @@ -0,0 +1,444 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/join46.q ql/src/test/queries/clientpositive/join46.q index a661c0f..1c819c3 100644 --- ql/src/test/queries/clientpositive/join46.q +++ ql/src/test/queries/clientpositive/join46.q @@ -183,6 +183,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -196,7 +197,37 @@ ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102); +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; + -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -207,8 +238,23 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and right input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -219,8 +265,10 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Keys plus residual (full outer join) +SET hive.mapjoin.full.outer=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -234,7 +282,65 @@ ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)); +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); +SET hive.merge.nway.joins=true; + -- Mixed ( FOJ (ROJ, LOJ) ) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM ( @@ -273,3 +379,4 @@ FULL OUTER JOIN ( OR test2.key between 100 and 102)) ) sq2 ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); +SET hive.merge.nway.joins=true; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/mapjoin46.q ql/src/test/queries/clientpositive/mapjoin46.q index 348dd67..171a6a9 100644 --- ql/src/test/queries/clientpositive/mapjoin46.q +++ ql/src/test/queries/clientpositive/mapjoin46.q @@ -2,6 +2,8 @@ set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; set hive.join.emit.interval=2; +-- SORT_QUERY_RESULTS + CREATE TABLE test1 (key INT, value INT, col_1 STRING); INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); @@ -172,6 +174,22 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -184,8 +202,23 @@ FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and left input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -196,8 +229,23 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Disjunction with pred on multiple inputs and right input (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -208,8 +256,25 @@ SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102); +SET hive.merge.nway.joins=true; -- Keys plus residual (full outer join) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 @@ -222,8 +287,51 @@ FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)); +SET hive.merge.nway.joins=true; -- Mixed ( FOJ (ROJ, LOJ) ) +SET hive.mapjoin.full.outer=false; +EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); + +SET hive.mapjoin.full.outer=true; +SET hive.merge.nway.joins=false; EXPLAIN SELECT * FROM ( @@ -262,3 +370,4 @@ FULL OUTER JOIN ( OR test2.key between 100 and 102)) ) sq2 ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2); +SET hive.merge.nway.joins=true; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q index ff4cde2..ee9a89c 100644 --- ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q +++ ql/src/test/queries/clientpositive/tez_dynpart_hashjoin_3.q @@ -22,3 +22,9 @@ select a.* from alltypesorc a left outer join src b on a.cint = cast(b.key as int) and (a.cint < 100) limit 1; + +explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1; diff --git ql/src/test/queries/clientpositive/vector_full_outer_join.q ql/src/test/queries/clientpositive/vector_full_outer_join.q new file mode 100644 index 0000000..cc77488 --- /dev/null +++ ql/src/test/queries/clientpositive/vector_full_outer_join.q @@ -0,0 +1,82 @@ +set hive.cli.print.header=true; +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.fetch.task.conversion=none; +set hive.auto.convert.join=true; +set hive.auto.convert.join.noconditionaltask=true; +set hive.auto.convert.join.noconditionaltask.size=10000; + +-- SORT_QUERY_RESULTS + +drop table if exists TJOIN1; +drop table if exists TJOIN2; +create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc; +create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc; +create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE ; +LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE; +LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE; +INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE; +INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; + +SET hive.mapjoin.full.outer=true; + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=false; +set hive.mapjoin.hybridgrace.hashtable=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=false; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=true; +SET hive.vectorized.execution.mapjoin.native.enabled=true; +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + + +-- Omit tjoin2.c1 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +-- Omit tjoin2.c1 and tjoin2.c2 +explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); + +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ); diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q new file mode 100644 index 0000000..e5218fd --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_fast.q @@ -0,0 +1,444 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=true; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for FAST hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q new file mode 100644 index 0000000..2fbfcbb --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized.q @@ -0,0 +1,444 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=true; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized Native MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q new file mode 100644 index 0000000..fdea4bc --- /dev/null +++ ql/src/test/queries/clientpositive/vector_fullouter_mapjoin_1_optimized_passthru.q @@ -0,0 +1,444 @@ +set hive.mapred.mode=nonstrict; +set hive.explain.user=false; +set hive.vectorized.execution.enabled=true; +set hive.mapjoin.hybridgrace.hashtable=false; +set hive.vectorized.execution.mapjoin.native.enabled=false; +set hive.vectorized.execution.mapjoin.native.fast.hashtable.enabled=false; + +set hive.auto.convert.join=true; +SET hive.auto.convert.join.noconditionaltask=true; +set hive.stats.fetch.column.stats=false; + +------------------------------------------------------------------------------------------ +-- FULL OUTER Vectorized PASS-TRUE Mode MapJoin variation for OPTIMIZED hash table implementation. +------------------------------------------------------------------------------------------ + +-- SORT_QUERY_RESULTS + +------------------------------------------------------------------------------------------ +-- SHARED-MEMORY MAPJOIN +------------------------------------------------------------------------------------------ +set hive.optimize.dynamic.partition.hashjoin=false; +SET hive.auto.convert.join.noconditionaltask.size=1000000000; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt; +CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt; + +CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt; +CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt; + +CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt; +CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt; + +CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt; +CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt; + +analyze table fullouter_long_big_1a compute statistics; +analyze table fullouter_long_big_1a compute statistics for columns; +analyze table fullouter_long_big_1a_nonull compute statistics; +analyze table fullouter_long_big_1a_nonull compute statistics for columns; +analyze table fullouter_long_small_1a compute statistics; +analyze table fullouter_long_small_1a compute statistics for columns; +analyze table fullouter_long_small_1a_nonull compute statistics; +analyze table fullouter_long_small_1a_nonull compute statistics for columns; + +-- Do first one with FULL OUTER MapJoin NOT Enabled. +SET hive.mapjoin.full.outer=false; +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SET hive.mapjoin.full.outer=true; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b; + +CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b; + +analyze table fullouter_long_big_1b compute statistics; +analyze table fullouter_long_big_1b compute statistics for columns; +analyze table fullouter_long_small_1b compute statistics; +analyze table fullouter_long_small_1b compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c; + +CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c; + +analyze table fullouter_long_big_1c compute statistics; +analyze table fullouter_long_big_1c compute statistics for columns; +analyze table fullouter_long_small_1c compute statistics; +analyze table fullouter_long_small_1c compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + +CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d; + +CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d; + +analyze table fullouter_long_big_1d compute statistics; +analyze table fullouter_long_big_1d compute statistics for columns; +analyze table fullouter_long_small_1d compute statistics; +analyze table fullouter_long_small_1d compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt; +CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt; + +CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt; +CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt; + +CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt; +CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt; + +CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt; +CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt; + +analyze table fullouter_multikey_big_1a compute statistics; +analyze table fullouter_multikey_big_1a compute statistics for columns; +analyze table fullouter_multikey_big_1a_nonull compute statistics; +analyze table fullouter_multikey_big_1a_nonull compute statistics for columns; +analyze table fullouter_multikey_small_1a compute statistics; +analyze table fullouter_multikey_small_1a compute statistics for columns; +analyze table fullouter_multikey_small_1a_nonull compute statistics; +analyze table fullouter_multikey_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt; +CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt; + +CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt; +CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt; + +analyze table fullouter_multikey_big_1b_txt compute statistics; +analyze table fullouter_multikey_big_1b_txt compute statistics for columns; +analyze table fullouter_multikey_small_1b_txt compute statistics; +analyze table fullouter_multikey_small_1b_txt compute statistics for columns; + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt; +CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt; + +CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt; +CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt; + +CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt; +CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt; + +CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ','; +LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt; +CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt; + +analyze table fullouter_string_big_1a compute statistics; +analyze table fullouter_string_big_1a compute statistics for columns; +analyze table fullouter_string_big_1a_nonull compute statistics; +analyze table fullouter_string_big_1a_nonull compute statistics for columns; +analyze table fullouter_string_small_1a compute statistics; +analyze table fullouter_string_small_1a compute statistics for columns; +analyze table fullouter_string_small_1a_nonull compute statistics; +analyze table fullouter_string_small_1a_nonull compute statistics for columns; + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + + +------------------------------------------------------------------------------------------ +-- DYNAMIC PARTITION HASH JOIN +------------------------------------------------------------------------------------------ + +set hive.optimize.dynamic.partition.hashjoin=true; + +-- NOTE: Use very small sizes here to skip SHARED MEMORY MapJoin and force usage +-- NOTE: of DYNAMIC PARTITION HASH JOIN instead. +set hive.auto.convert.join.noconditionaltask.size=500; +set hive.exec.reducers.bytes.per.reducer=500; + +------------------------------------------------------------------------------------------ +-- Single LONG key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- Single STRING key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key; + + +------------------------------------------------------------------------------------------ +-- MULTI-KEY key +------------------------------------------------------------------------------------------ + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Big table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Small table without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +-- Both Big and Small tables without NULL key(s). +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1; + + + + +EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1; + + + diff --git ql/src/test/queries/clientpositive/vector_join30.q ql/src/test/queries/clientpositive/vector_join30.q index 6557a71..9108c89 100644 --- ql/src/test/queries/clientpositive/vector_join30.q +++ ql/src/test/queries/clientpositive/vector_join30.q @@ -10,7 +10,7 @@ SET hive.auto.convert.join.noconditionaltask.size=1000000000; CREATE TABLE orcsrc STORED AS ORC AS SELECT * FROM src; -explain vectorization expression +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -18,14 +18,14 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); -explain vectorization expression +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -33,116 +33,238 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)); +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +----------------- + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -explain vectorization expression +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- LEFT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) RIGHT OUTER JOIN @@ -150,12 +272,33 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); -FROM +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); + +explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)); + +-- FROM +-- (SELECT orcsrc.* FROM orcsrc sort by key) x +-- RIGHT OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Y +-- ON (x.key = Y.key) +-- FULL OUTER JOIN +-- (SELECT orcsrc.* FROM orcsrc sort by value) Z +-- ON (x.key = Z.key) +-- select sum(hash(Y.key,Y.value)); diff --git ql/src/test/queries/clientpositive/vector_join_filters.q ql/src/test/queries/clientpositive/vector_join_filters.q index aac10c1..bb11ae9 100644 --- ql/src/test/queries/clientpositive/vector_join_filters.q +++ ql/src/test/queries/clientpositive/vector_join_filters.q @@ -14,7 +14,10 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; @@ -26,10 +29,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a RIGHT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) LEFT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value); diff --git ql/src/test/queries/clientpositive/vector_join_nulls.q ql/src/test/queries/clientpositive/vector_join_nulls.q index b978b41..694f931 100644 --- ql/src/test/queries/clientpositive/vector_join_nulls.q +++ ql/src/test/queries/clientpositive/vector_join_nulls.q @@ -14,7 +14,11 @@ CREATE TABLE myinput1 STORED AS ORC AS SELECT * FROM myinput1_txt; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b; + SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.value = b.value; @@ -23,9 +27,21 @@ SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN my SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key = b.key and a.value=b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG +-- SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value; SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value); diff --git ql/src/test/queries/clientpositive/vector_left_outer_join2.q ql/src/test/queries/clientpositive/vector_left_outer_join2.q index ccceb36..84f656b 100644 --- ql/src/test/queries/clientpositive/vector_left_outer_join2.q +++ ql/src/test/queries/clientpositive/vector_left_outer_join2.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.mapred.mode=nonstrict; set hive.explain.user=false; set hive.fetch.task.conversion=none; @@ -20,14 +21,14 @@ INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE; set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); set hive.vectorized.execution.enabled=false; set hive.mapjoin.hybridgrace.hashtable=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -36,7 +37,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -44,7 +45,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -52,7 +53,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); @@ -60,7 +61,7 @@ select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left out set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization expression +explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ); \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q index 8469a06..0abc983 100644 --- ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q +++ ql/src/test/queries/clientpositive/vector_leftsemi_mapjoin.q @@ -3,6 +3,7 @@ set hive.fetch.task.conversion=none; set hive.auto.convert.join=true; set hive.auto.convert.join.noconditionaltask=true; set hive.auto.convert.join.noconditionaltask.size=10000; +SET hive.merge.nway.joins=false; -- SORT_QUERY_RESULTS @@ -83,6 +84,12 @@ explain vectorization only summary select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; explain vectorization only summary select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; @@ -91,10 +98,18 @@ explain vectorization only summary select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=true; explain vectorization only summary select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; + +SET hive.mapjoin.full.outer=false; +explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; explain vectorization only summary select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; @@ -166,6 +181,12 @@ explain vectorization summary select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=false; +explain vectorization summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; explain vectorization summary select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; @@ -174,14 +195,22 @@ explain vectorization summary select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=true; explain vectorization summary select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; +SET hive.mapjoin.full.outer=false; explain vectorization summary select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; +explain vectorization summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +-- select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; + explain vectorization summary select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; @@ -250,22 +279,36 @@ explain vectorization only operator select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization only operator +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +-- select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=true; explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; +SET hive.mapjoin.full.outer=false; explain vectorization only operator select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +SET hive.mapjoin.full.outer=true; +explain vectorization only operator +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +-- select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; + explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; @@ -278,83 +321,97 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=false; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.merge.nway.joins=true; +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; + +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; @@ -362,83 +419,97 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=false; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.merge.nway.joins=true; +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; + +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; @@ -446,82 +517,96 @@ set hive.vectorized.execution.enabled=true; set hive.mapjoin.hybridgrace.hashtable=true; SET hive.vectorized.execution.mapjoin.native.enabled=true; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value; -explain vectorization detail +explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value ; -explain vectorization detail +explain vectorization detail debug select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value; -explain vectorization detail +explain vectorization detail debug select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key; + +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.merge.nway.joins=true; +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key; +SET hive.merge.nway.joins=false; + +SET hive.mapjoin.full.outer=false; +explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +SET hive.mapjoin.full.outer=true; +explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key; -explain vectorization detail +explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100; diff --git ql/src/test/queries/clientpositive/vector_nullsafe_join.q ql/src/test/queries/clientpositive/vector_nullsafe_join.q index 6a7ff72..2d7155e 100644 --- ql/src/test/queries/clientpositive/vector_nullsafe_join.q +++ ql/src/test/queries/clientpositive/vector_nullsafe_join.q @@ -38,7 +38,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins @@ -65,7 +69,11 @@ select * from myinput1 a join myinput1 b on a.key<=>b.value AND a.value<=>b.key -- outer joins SELECT * FROM myinput1 a LEFT OUTER JOIN myinput1 b ON a.key<=>b.value; + +EXPLAIN VECTORIZATION DETAIL DEBUG SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; +-- SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value; + SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value; -- map joins diff --git ql/src/test/queries/clientpositive/vectorized_join46.q ql/src/test/queries/clientpositive/vectorized_join46.q index af155cc..7be2b0e 100644 --- ql/src/test/queries/clientpositive/vectorized_join46.q +++ ql/src/test/queries/clientpositive/vectorized_join46.q @@ -1,3 +1,4 @@ +set hive.cli.print.header=true; set hive.vectorized.execution.enabled=true; set hive.auto.convert.join=true; set hive.strict.checks.cartesian.product=false; @@ -15,7 +16,7 @@ INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), -- Basic outer join -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); @@ -25,7 +26,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value); -- Conjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -39,7 +40,7 @@ ON (test1.value=test2.value AND test2.key between 100 and 102); -- Conjunction with pred on single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 @@ -51,7 +52,7 @@ ON (test1.key between 100 and 102 AND test2.key between 100 and 102); -- Conjunction with pred on multiple inputs and none (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); @@ -61,7 +62,7 @@ FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true); -- Condition on one input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); @@ -71,7 +72,7 @@ FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and single inputs (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -85,7 +86,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -97,7 +98,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -109,7 +110,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (left outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value @@ -123,7 +124,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -137,7 +138,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -149,7 +150,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -161,7 +162,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (right outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value @@ -175,7 +176,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102)); -- Disjunction with pred on multiple inputs and single inputs (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -189,7 +190,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Disjunction with pred on multiple inputs and left input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -201,7 +202,7 @@ ON (test1.value=test2.value OR test1.key between 100 and 102); -- Disjunction with pred on multiple inputs and right input (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value @@ -213,7 +214,7 @@ ON (test1.value=test2.value OR test2.key between 100 and 102); -- Keys plus residual (full outer join) -EXPLAIN +EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value diff --git ql/src/test/queries/clientpositive/vectorized_join46_mr.q ql/src/test/queries/clientpositive/vectorized_join46_mr.q new file mode 100644 index 0000000..7be2b0e --- /dev/null +++ ql/src/test/queries/clientpositive/vectorized_join46_mr.q @@ -0,0 +1,228 @@ +set hive.cli.print.header=true; +set hive.vectorized.execution.enabled=true; +set hive.auto.convert.join=true; +set hive.strict.checks.cartesian.product=false; +set hive.join.emit.interval=2; + +-- SORT_QUERY_RESULTS + +CREATE TABLE test1 (key INT, value INT, col_1 STRING); +INSERT INTO test1 VALUES (NULL, NULL, 'None'), (98, NULL, 'None'), + (99, 0, 'Alice'), (99, 2, 'Mat'), (100, 1, 'Bob'), (101, 2, 'Car'); + +CREATE TABLE test2 (key INT, value INT, col_2 STRING); +INSERT INTO test2 VALUES (102, 2, 'Del'), (103, 2, 'Ema'), + (104, 3, 'Fli'), (105, NULL, 'None'); + + +-- Basic outer join +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value); + +-- Conjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102 + AND test2.key between 100 and 102); + +-- Conjunction with pred on multiple inputs and none (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value AND true); + +-- Condition on one input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and single inputs (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (left outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 LEFT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (right outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 RIGHT OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +-- Disjunction with pred on multiple inputs and single inputs (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and left input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102); + +-- Disjunction with pred on multiple inputs and right input (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102); + +-- Keys plus residual (full outer join) +EXPLAIN VECTORIZATION DETAIL +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); + +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)); diff --git ql/src/test/results/clientpositive/annotate_stats_join.q.out ql/src/test/results/clientpositive/annotate_stats_join.q.out index 9173e7a..5d09142 100644 --- ql/src/test/results/clientpositive/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/annotate_stats_join.q.out @@ -874,7 +874,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/auto_join18.q.out ql/src/test/results/clientpositive/auto_join18.q.out index 0039983..94c2661 100644 --- ql/src/test/results/clientpositive/auto_join18.q.out +++ ql/src/test/results/clientpositive/auto_join18.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out index 4025f06..c233273 100644 --- ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/auto_join18_multi_distinct.q.out @@ -90,7 +90,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join6.q.out ql/src/test/results/clientpositive/auto_join6.q.out index 166ecda..e9a3a5f 100644 --- ql/src/test/results/clientpositive/auto_join6.q.out +++ ql/src/test/results/clientpositive/auto_join6.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/auto_join7.q.out ql/src/test/results/clientpositive/auto_join7.q.out index c8f7144..5853d3c 100644 --- ql/src/test/results/clientpositive/auto_join7.q.out +++ ql/src/test/results/clientpositive/auto_join7.q.out @@ -107,7 +107,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out index e455524..f03c381 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_1.q.out @@ -272,7 +272,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -493,7 +493,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out index 1dfacda..5d4468b 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_2.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -456,7 +456,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out index cf4c744..1a0a0cf 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_3.q.out @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -453,7 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out index 4b1313d..16137dd 100644 --- ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/beeline/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/cbo_rp_join1.q.out ql/src/test/results/clientpositive/cbo_rp_join1.q.out index 03ca51f..1e25a6f 100644 --- ql/src/test/results/clientpositive/cbo_rp_join1.q.out +++ ql/src/test/results/clientpositive/cbo_rp_join1.q.out @@ -53,7 +53,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -155,7 +155,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} {(VALUE._col1 = 40)} 1 {(VALUE._col0 = 40)} @@ -257,7 +257,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 = 40)} 1 {(VALUE._col0 = 40)} @@ -359,7 +359,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 filter predicates: 0 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} 1 {(VALUE._col0 > 40)} {(VALUE._col1 > 50)} {(VALUE._col0 = VALUE._col1)} diff --git ql/src/test/results/clientpositive/correlationoptimizer8.q.out ql/src/test/results/clientpositive/correlationoptimizer8.q.out index 34a9216..1a58dca 100644 --- ql/src/test/results/clientpositive/correlationoptimizer8.q.out +++ ql/src/test/results/clientpositive/correlationoptimizer8.q.out @@ -1081,7 +1081,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(_col0) (type: double) diff --git ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out new file mode 100644 index 0000000..0ae9df9 --- /dev/null +++ ql/src/test/results/clientpositive/fullouter_mapjoin_1.q.out @@ -0,0 +1,176 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 205 Basic stats: COMPLETE Column stats: NONE + TableScan + alias: s + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 1 Data size: 1640 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 225 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 diff --git ql/src/test/results/clientpositive/infer_join_preds.q.out ql/src/test/results/clientpositive/infer_join_preds.q.out index 6a4fa34..f28ca6c 100644 --- ql/src/test/results/clientpositive/infer_join_preds.q.out +++ ql/src/test/results/clientpositive/infer_join_preds.q.out @@ -458,7 +458,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18.q.out ql/src/test/results/clientpositive/join18.q.out index 3d5a90f..f64df58 100644 --- ql/src/test/results/clientpositive/join18.q.out +++ ql/src/test/results/clientpositive/join18.q.out @@ -87,7 +87,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join18_multi_distinct.q.out ql/src/test/results/clientpositive/join18_multi_distinct.q.out index b064af2..d9fa1ec 100644 --- ql/src/test/results/clientpositive/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/join18_multi_distinct.q.out @@ -89,7 +89,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join45.q.out ql/src/test/results/clientpositive/join45.q.out index f2cca31..91f7259 100644 --- ql/src/test/results/clientpositive/join45.q.out +++ ql/src/test/results/clientpositive/join45.q.out @@ -1359,7 +1359,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1464,7 +1464,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join46.q.out ql/src/test/results/clientpositive/join46.q.out index 0847ca6..f05bcb1 100644 --- ql/src/test/results/clientpositive/join46.q.out +++ ql/src/test/results/clientpositive/join46.q.out @@ -1421,7 +1421,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1479,6 +1479,204 @@ PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +101 2 Car 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 103 2 Ema +101 2 Car 102 2 Del +100 1 Bob 105 NULL None +100 1 Bob 104 3 Fli +100 1 Bob 103 2 Ema +100 1 Bob 102 2 Del +99 2 Mat 103 2 Ema +99 2 Mat 102 2 Del +99 0 Alice 102 2 Del +98 NULL None 102 2 Del +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +101 2 Car 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 103 2 Ema +101 2 Car 102 2 Del +100 1 Bob 105 NULL None +100 1 Bob 104 3 Fli +100 1 Bob 103 2 Ema +100 1 Bob 102 2 Del +99 2 Mat 103 2 Ema +99 2 Mat 102 2 Del +99 0 Alice NULL NULL NULL +98 NULL None NULL NULL NULL +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN @@ -1520,7 +1718,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1617,7 +1815,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1665,19 +1863,18 @@ POSTHOOK: Input: default@test2 NULL NULL None 102 2 Del NULL NULL NULL 105 NULL None NULL NULL NULL 104 3 Fli +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1695,11 +1892,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE @@ -1708,24 +1903,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1737,11 +1930,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1749,21 +1942,461 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None -99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 102 2 Del +100 1 Bob 102 2 Del +99 2 Mat 103 2 Ema 99 2 Mat 102 2 Del +99 0 Alice 102 2 Del +98 NULL None 102 2 Del +NULL NULL None 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL NULL 104 3 Fli +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +98 NULL None NULL NULL NULL +NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema +101 2 Car 102 2 Del +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +98 NULL None NULL NULL NULL +NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema +101 2 Car 102 2 Del +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1, Stage-4 + Stage-4 is a root stage + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + TableScan + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-4 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1874,7 +2507,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/join47.q.out ql/src/test/results/clientpositive/join47.q.out index 4a13df8..67ae836 100644 --- ql/src/test/results/clientpositive/join47.q.out +++ ql/src/test/results/clientpositive/join47.q.out @@ -1341,7 +1341,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1446,7 +1446,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join6.q.out ql/src/test/results/clientpositive/join6.q.out index 978cc45..69818e8 100644 --- ql/src/test/results/clientpositive/join6.q.out +++ ql/src/test/results/clientpositive/join6.q.out @@ -81,7 +81,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join7.q.out ql/src/test/results/clientpositive/join7.q.out index 91c4eb7..ac65f34 100644 --- ql/src/test/results/clientpositive/join7.q.out +++ ql/src/test/results/clientpositive/join7.q.out @@ -107,7 +107,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/join_filters_overlap.q.out ql/src/test/results/clientpositive/join_filters_overlap.q.out index db0b26d..c9079d7 100644 --- ql/src/test/results/clientpositive/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/join_filters_overlap.q.out @@ -726,7 +726,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out index 89b7169..4a10d24 100644 --- ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out +++ ql/src/test/results/clientpositive/llap/acid_no_buckets.q.out @@ -1135,10 +1135,10 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 6, 2] + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1333,10 +1333,10 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1501,11 +1501,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1531,11 +1530,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -1580,11 +1579,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -1614,6 +1612,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -1919,11 +1920,11 @@ STAGE PLANS: selectExpressions: StringGroupColConcatStringScalar(col 1:string, val updated) -> 6:string Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 6, 2] + partitionColumns: 5:int + valueColumns: 0:string, 6:string, 2:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2118,11 +2119,11 @@ STAGE PLANS: projectedOutputColumnNums: [4, 2, 3] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [2, 3] + partitionColumns: 5:int + valueColumns: 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2288,11 +2289,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2318,11 +2318,11 @@ STAGE PLANS: vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ds:string, 3:hr:string, 4:ROW__ID:struct] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3, 0, 1] + keyColumns: 2:string, 3:string, 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 3, 0, 1] - valueColumnNums: [4] + partitionColumns: 2:string, 3:string, 0:string, 1:string + valueColumns: 4:struct Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -2367,11 +2367,10 @@ STAGE PLANS: projectedOutputColumnNums: [] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [] + partitionColumns: 0:string, 1:string, 2:string, 3:string Select Vectorization: className: VectorSelectOperator native: true @@ -2401,6 +2400,9 @@ STAGE PLANS: className: VectorAppMasterEventOperator native: true Reducer 3 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/auto_join_filters.q.out ql/src/test/results/clientpositive/llap/auto_join_filters.q.out index 540612d..25d9b7f 100644 --- ql/src/test/results/clientpositive/llap/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_filters.q.out @@ -54,6 +54,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -198,6 +208,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -340,6 +386,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join MERGEJOIN[14][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -484,6 +540,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out index 04da1f2..22876aa 100644 --- ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/auto_join_nulls.q.out @@ -188,6 +188,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out index b138a2d..1c6ebb3 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer1.q.out @@ -2203,7 +2203,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -2345,7 +2345,291 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 525 Data size: 3526 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 308 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +652447 510 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT x.key AS key, count(1) AS cnt + FROM src1 x FULL OUTER JOIN src y ON (x.key = y.key) + GROUP BY x.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out index 64248e7..57faf1d 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer2.q.out @@ -1093,7 +1093,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1263,7 +1263,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1339,20 +1339,18 @@ POSTHOOK: Input: default@src1 #### A masked pattern was here #### 12744278 500 652447 25 PREHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1364,140 +1362,126 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Stage: Stage-0 Fetch Operator @@ -1505,42 +1489,38 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@src1 #### A masked pattern was here #### -POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 #### A masked pattern was here #### -12744278 310 +12744278 500 652447 25 PREHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN -SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) -FROM (SELECT a.key AS key, count(1) AS cnt +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b - ON (a.key = b.key) - GROUP BY a.key) tmp + ON (a.key = b.key)) tmp POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1552,128 +1532,300 @@ STAGE PLANS: #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) - Reducer 4 <- Reducer 3 (SIMPLE_EDGE) - Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) - Reducer 7 <- Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: x - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs - Map 6 + Map 5 Map Operator Tree: TableScan alias: y - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: key (type: string) - outputColumnNames: key - Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + expressions: key (type: string), value (type: string) + outputColumnNames: key, value + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator + aggregations: count(value) keys: key (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Execution mode: llap LLAP IO: no inputs Reducer 2 Execution mode: llap Reduce Operator Tree: Group By Operator + aggregations: count(VALUE._col0) keys: KEY._col0 (type: string) mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 23750 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) Reducer 3 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) - outputColumnNames: _col0 - Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE - Group By Operator - aggregations: count() - keys: _col0 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col1 (type: bigint) - Reducer 4 - Execution mode: llap - Reduce Operator Tree: - Group By Operator - aggregations: count(VALUE._col0) - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Select Operator - expressions: hash(_col0) (type: int), hash(_col1) (type: int) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 262 Data size: 1890 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator - aggregations: sum(_col0), sum(_col1) + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: bigint), _col1 (type: bigint) - Reducer 5 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 Execution mode: llap Reduce Operator Tree: Group By Operator - aggregations: sum(VALUE._col0), sum(VALUE._col1) + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) mode: mergepartial - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 7 + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.cnt AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 500 652447 25 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 Execution mode: llap Reduce Operator Tree: Group By Operator @@ -1715,6 +1867,932 @@ POSTHOOK: Input: default@src1 #### A masked pattern was here #### 12744278 310 PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 7 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 7 <- Map 6 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: key + Statistics: Num rows: 25 Data size: 2150 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + keys: key (type: string) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 250 Data size: 21750 Basic stats: COMPLETE Column stats: COMPLETE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 262 Data size: 870 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 17 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 5 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 7 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1032 Basic stats: COMPLETE Column stats: COMPLETE + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT a.key AS key, count(1) AS cnt + FROM (SELECT x.key as key, count(x.value) AS cnt FROM src x group by x.key) a + FULL OUTER JOIN (SELECT y.key as key, count(y.value) AS cnt FROM src1 y group by y.key) b + ON (a.key = b.key) + GROUP BY a.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +12744278 310 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1711763 3531902962 1711763 37 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (ONE_TO_ONE_EDGE), Reducer 6 (ONE_TO_ONE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) + Reducer 6 <- Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Group By Operator + aggregations: count(value) + keys: key (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col1, _col2 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col1 (type: string), _col2 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 40 Data size: 7000 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int), hash(_col2) (type: int), hash(_col3) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 30 Data size: 8070 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: sum(_col0), sum(_col1), sum(_col2), sum(_col3) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1), sum(VALUE._col2), sum(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 6 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1128 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: bigint) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@src1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) +FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 + FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a + JOIN (SELECT z.key AS key, count(z.value) AS cnt FROM src1 z group by z.key) b + ON (a.key = b.key)) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@src1 +#### A masked pattern was here #### +1711763 3531902962 1711763 37 +PREHOOK: query: EXPLAIN SELECT SUM(HASH(key1)), SUM(HASH(cnt1)), SUM(HASH(key2)), SUM(HASH(cnt2)) FROM (SELECT a.key AS key1, a.val AS cnt1, b.key AS key2, b.cnt AS cnt2 FROM (SELECT x.key AS key, x.value AS val FROM src1 x JOIN src y ON (x.key = y.key)) a diff --git ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out index 3c9b6fc..acafc0d 100644 --- ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out +++ ql/src/test/results/clientpositive/llap/correlationoptimizer4.q.out @@ -1605,8 +1605,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1767,8 +1767,332 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + 2 _col0 (type: int) + outputColumnNames: _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: int) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0) (type: int), hash(_col1) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0), sum(_col1) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), sum(VALUE._col1) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +21 14 +PREHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(HASH(tmp.key)), SUM(HASH(tmp.cnt)) +FROM (SELECT y.key AS key, count(1) AS cnt + FROM T2 x FULL OUTER JOIN T1 y ON (x.key = y.key) FULL OUTER JOIN T3 z ON (y.key = z.key) + GROUP BY y.key) tmp +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE), Map 6 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: x + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: y + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 6 + Map Operator Tree: + TableScan + alias: z + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/cross_prod_1.q.out ql/src/test/results/clientpositive/llap/cross_prod_1.q.out index fd03fe5..a7d9a94 100644 --- ql/src/test/results/clientpositive/llap/cross_prod_1.q.out +++ ql/src/test/results/clientpositive/llap/cross_prod_1.q.out @@ -1971,7 +1971,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/explainuser_4.q.out ql/src/test/results/clientpositive/llap/explainuser_4.q.out index c4fdc18..c7544a4 100644 --- ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -270,7 +270,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] Map Join Operator [MAPJOIN_17] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 @@ -351,7 +351,7 @@ Stage-0 Group By Operator [GBY_10] (rows=1 width=8) Output:["_col0"],aggregations:["count()"] Map Join Operator [MAPJOIN_19] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 @@ -431,7 +431,7 @@ Stage-0 Group By Operator [GBY_10] (rows=1501 width=215) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Map Join Operator [MAPJOIN_21] (rows=1501 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..7738924 --- /dev/null +++ ql/src/test/results/clientpositive/llap/fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,6285 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: no inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/join46.q.out ql/src/test/results/clientpositive/llap/join46.q.out index ecb34d1..f78e9c5 100644 --- ql/src/test/results/clientpositive/llap/join46.q.out +++ ql/src/test/results/clientpositive/llap/join46.q.out @@ -1631,7 +1631,122 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 keys: 0 1 @@ -1744,7 +1859,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1800,13 +1915,13 @@ PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1855,12 +1970,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1880,7 +1995,7 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1888,34 +2003,36 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -NULL NULL NULL 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1926,7 +2043,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1939,11 +2056,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1956,11 +2071,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1968,16 +2081,16 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1989,11 +2102,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -2001,21 +2114,579 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE), Reducer 4 (CUSTOM_SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 5 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 3' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -2138,7 +2809,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out index ef1a6f3..f9c376f 100644 --- ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out +++ ql/src/test/results/clientpositive/llap/join_max_hashtable.q.out @@ -224,6 +224,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -310,6 +311,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -397,6 +399,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -483,6 +486,7 @@ STAGE PLANS: input vertices: 1 Map 3 Statistics: Num rows: 809 Data size: 144002 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false diff --git ql/src/test/results/clientpositive/llap/llap_acid.q.out ql/src/test/results/clientpositive/llap/llap_acid.q.out index dbf531c..e32d3bd 100644 --- ql/src/test/results/clientpositive/llap/llap_acid.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid.q.out @@ -115,10 +115,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -259,11 +259,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -369,10 +369,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out index 37c213b..ee48343 100644 --- ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out +++ ql/src/test/results/clientpositive/llap/llap_acid_fast.q.out @@ -109,10 +109,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -253,11 +253,11 @@ STAGE PLANS: projectedOutputColumnNums: [5, 2, 3, 4] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5] + keyColumns: 5:struct native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [2, 3, 4] + partitionColumns: 6:int + valueColumns: 2:float, 3:double, 4:smallint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: @@ -363,10 +363,10 @@ STAGE PLANS: projectedOutputColumnNums: [0, 4, 1] Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:smallint, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Execution mode: vectorized, llap LLAP IO: may be used (ACID table) Map Vectorization: diff --git ql/src/test/results/clientpositive/llap/llap_partitioned.q.out ql/src/test/results/clientpositive/llap/llap_partitioned.q.out index 799062e..e80ec79 100644 --- ql/src/test/results/clientpositive/llap/llap_partitioned.q.out +++ ql/src/test/results/clientpositive/llap/llap_partitioned.q.out @@ -1645,13 +1645,15 @@ STAGE PLANS: 0 ctinyint (type: tinyint) 1 ctinyint (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [10] - bigTableRetainedColumnNums: [1, 6, 7, 10] - bigTableValueColumnNums: [1, 6, 7, 10] + bigTableKeyColumns: 10:tinyint + bigTableRetainColumnNums: [1, 6, 7, 10] + bigTableValueColumns: 1:int, 6:char(255), 7:varchar(255), 10:tinyint className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 6, 7, 10] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:int, 6:char(255), 7:varchar(255), 10:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col6, _col7, _col10 input vertices: 1 Map 2 @@ -1706,10 +1708,9 @@ STAGE PLANS: Map-reduce partition columns: ctinyint (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [10] + keyColumns: 10:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint) @@ -2115,10 +2116,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 694 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out index 2c13d5d..86ea9ac 100644 --- ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out +++ ql/src/test/results/clientpositive/llap/llap_vector_nohybridgrace.q.out @@ -63,6 +63,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE @@ -242,6 +243,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Fast Hash Table and No Hybrid Hash Join IS true + hashTableImplementationType: FAST input vertices: 1 Map 3 Statistics: Num rows: 25044 Data size: 200352 Basic stats: COMPLETE Column stats: COMPLETE diff --git ql/src/test/results/clientpositive/llap/mapjoin46.q.out ql/src/test/results/clientpositive/llap/mapjoin46.q.out index c6c34be..6779ed9 100644 --- ql/src/test/results/clientpositive/llap/mapjoin46.q.out +++ ql/src/test/results/clientpositive/llap/mapjoin46.q.out @@ -1446,7 +1446,122 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 keys: 0 1 @@ -1559,7 +1674,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1615,13 +1730,13 @@ PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1670,12 +1785,12 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false @@ -1695,7 +1810,7 @@ Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reduce PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1703,34 +1818,36 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None 101 2 Car 102 2 Del 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli -NULL NULL NULL 105 NULL None +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1741,7 +1858,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -1754,11 +1871,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Map 3 @@ -1771,11 +1886,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -1783,16 +1896,16 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1804,11 +1917,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1816,21 +1929,576 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL None 102 2 Del +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1711 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE) + Map 3 <- Map 1 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 1 Map 4 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 3 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + input vertices: + 0 Map 1 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Execution mode: llap + LLAP IO: no inputs + Map 4 + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: string) + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 64 Data size: 24440 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema Warning: Shuffle Join MERGEJOIN[26][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Reducer 2' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1966,7 +2634,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 diff --git ql/src/test/results/clientpositive/llap/mergejoin.q.out ql/src/test/results/clientpositive/llap/mergejoin.q.out index 96be039..ddabfcd 100644 --- ql/src/test/results/clientpositive/llap/mergejoin.q.out +++ ql/src/test/results/clientpositive/llap/mergejoin.q.out @@ -52,10 +52,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -106,10 +106,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 25 Data size: 4375 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Select Operator @@ -136,10 +136,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -178,6 +177,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -210,10 +212,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -435,10 +436,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -488,10 +488,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -530,6 +529,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1610,10 +1612,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1655,10 +1656,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1697,6 +1697,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1806,10 +1809,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1851,10 +1853,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1893,6 +1894,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2002,10 +2006,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2047,10 +2050,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2075,7 +2077,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -2089,6 +2091,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2209,10 +2214,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -2263,10 +2268,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -2292,10 +2296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -2346,10 +2349,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2398,6 +2400,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -2417,6 +2422,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2487,10 +2495,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -2525,10 +2532,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -2608,10 +2614,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2661,10 +2666,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2703,6 +2707,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2824,10 +2831,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2877,10 +2883,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2930,10 +2935,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2983,10 +2987,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3022,6 +3025,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3041,6 +3047,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -3141,10 +3150,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22022 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3194,10 +3202,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 45500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3236,6 +3243,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3354,10 +3364,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -3408,10 +3418,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 25 Data size: 2225 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string) @@ -3437,10 +3446,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) Execution mode: vectorized, llap @@ -3491,10 +3499,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3543,6 +3550,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3562,6 +3572,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3632,10 +3645,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:int, 1:int, 2:binary Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: binary) Reducer 7 @@ -3670,10 +3682,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2] + valueColumns: 0:string, 1:string, 2:binary Statistics: Num rows: 1 Data size: 552 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: string), _col2 (type: binary) @@ -3761,10 +3772,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3814,10 +3824,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3867,10 +3876,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3920,10 +3928,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3959,6 +3966,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 624 Data size: 2496 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -3978,6 +3988,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -4091,10 +4104,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -4143,10 +4155,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 47500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -4196,10 +4207,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 242 Data size: 968 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: llap @@ -4220,6 +4230,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -4288,10 +4301,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 2000 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/partialdhj.q.out ql/src/test/results/clientpositive/llap/partialdhj.q.out index 4e62c4f..460c02d 100644 --- ql/src/test/results/clientpositive/llap/partialdhj.q.out +++ ql/src/test/results/clientpositive/llap/partialdhj.q.out @@ -107,6 +107,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) @@ -144,6 +145,7 @@ STAGE PLANS: input vertices: 0 Reducer 3 Statistics: Num rows: 25 Data size: 6675 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -337,6 +339,7 @@ STAGE PLANS: input vertices: 1 Reducer 5 Statistics: Num rows: 25 Data size: 6675 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true File Output Operator compressed: false @@ -358,6 +361,7 @@ STAGE PLANS: input vertices: 1 Map 6 Statistics: Num rows: 32 Data size: 2848 Basic stats: COMPLETE Column stats: COMPLETE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator keys: _col0 (type: string) diff --git ql/src/test/results/clientpositive/llap/semijoin.q.out ql/src/test/results/clientpositive/llap/semijoin.q.out index 82cee33..9711ab2 100644 --- ql/src/test/results/clientpositive/llap/semijoin.q.out +++ ql/src/test/results/clientpositive/llap/semijoin.q.out @@ -1968,7 +1968,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2394,7 +2394,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out index 89bdfd8..66bf118 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_4.q.out @@ -582,7 +582,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1046,7 +1046,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1280,7 +1280,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1397,7 +1397,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1511,8 +1511,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out index 06e4173..ec40621 100644 --- ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/llap/smb_mapjoin_5.q.out @@ -582,7 +582,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1046,7 +1046,7 @@ STAGE PLANS: Merge Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1280,7 +1280,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1397,7 +1397,7 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1511,8 +1511,8 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out index b63b25f..7492f64 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_1.q.out @@ -491,6 +491,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -623,6 +624,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -756,6 +758,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 4f557d3..2abe505 100644 --- ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -204,3 +204,99 @@ STAGE PLANS: Processor Tree: ListSink +PREHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +PREHOOK: type: QUERY +POSTHOOK: query: explain +select a.* +from alltypesorc a left outer join src b +on a.cint = cast(b.key as int) +limit 1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 3 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + TopN Hash Memory Usage: 0.1 + value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: UDFToInteger(_col0) (type: int) + sort order: + + Map-reduce partition columns: UDFToInteger(_col0) (type: int) + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: llap + LLAP IO: no inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: tinyint), VALUE._col1 (type: smallint), VALUE._col2 (type: int), VALUE._col3 (type: bigint), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: string), VALUE._col7 (type: string), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: boolean), VALUE._col11 (type: boolean) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col2 (type: int) + 1 UDFToInteger(_col0) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + input vertices: + 1 Map 3 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out index d0a5e62..d853612 100644 --- ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out +++ ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_1.q.out @@ -491,6 +491,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Reduce Output Operator key expressions: _col2 (type: int) @@ -623,6 +624,7 @@ STAGE PLANS: input vertices: 1 Map 4 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() @@ -756,6 +758,7 @@ STAGE PLANS: input vertices: 1 Map 5 Statistics: Num rows: 1501 Data size: 322826 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true HybridGraceHashJoin: true Group By Operator aggregations: count() diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 5d4bfe7..0fec415 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:decimal(38,18), 1:decimal(38,18), 2:decimal(38,18), 3:struct Statistics: Num rows: 1 Data size: 736 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(38,18)), _col1 (type: decimal(38,18)), _col2 (type: decimal(38,18)), _col3 (type: struct) Execution mode: vectorized, llap @@ -286,10 +285,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:double, 1:double, 2:double, 3:struct Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: struct) Execution mode: vectorized, llap @@ -417,10 +415,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:timestamp, 1:timestamp, 2:double, 3:struct Statistics: Num rows: 1 Data size: 240 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: double), _col3 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out index c99ac8d..0c89015 100644 --- ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_aggregate_without_gby.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out index 54216fa..43db2ed 100644 --- ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out +++ ql/src/test/results/clientpositive/llap/vector_auto_smb_mapjoin_14.q.out @@ -104,6 +104,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -236,6 +239,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -442,6 +448,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Map 5 Map Operator Tree: TableScan @@ -486,6 +495,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -541,6 +553,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -695,6 +710,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -832,6 +850,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -993,6 +1014,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1142,6 +1166,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1335,6 +1362,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1458,6 +1488,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1600,6 +1633,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1745,6 +1781,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -1917,6 +1956,9 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: default.dest2 Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-3 Dependency Collection @@ -2140,6 +2182,9 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: llap + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_between_columns.q.out ql/src/test/results/clientpositive/llap/vector_between_columns.q.out index c85c59e..d4de843 100644 --- ql/src/test/results/clientpositive/llap/vector_between_columns.q.out +++ ql/src/test/results/clientpositive/llap/vector_between_columns.q.out @@ -196,6 +196,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -368,6 +371,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out index aabfc73..1ab9448 100644 --- ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out +++ ql/src/test/results/clientpositive/llap/vector_binary_join_groupby.q.out @@ -155,6 +155,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 input vertices: 1 Map 4 @@ -567,6 +568,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out index 7dde2ec..128fe4f 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -575,10 +575,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [22, 24, 25, 26, 27, 28, 30, 31, 32, 33, 34, 35, 38, 40, 43, 44] + valueColumns: 22:string, 24:string, 25:string, 26:date, 27:double, 28:double, 30:decimal(10,2), 31:decimal(10,2), 32:decimal(12,2), 33:decimal(12,2), 34:decimal(10,2), 35:decimal(10,2), 38:timestamp, 40:int, 43:int, 44:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) Execution mode: vectorized, llap @@ -950,10 +950,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [27, 38, 48, 52, 54, 60, 63, 65, 67, 68, 69, 70, 73, 76, 79, 80] + valueColumns: 27:string, 38:string, 48:string, 52:date, 54:double, 60:double, 63:decimal(10,2), 65:decimal(10,2), 67:decimal(12,2), 68:decimal(12,2), 69:decimal(10,2), 70:decimal(10,2), 73:timestamp, 76:int, 79:int, 80:date Statistics: Num rows: 101 Data size: 57327 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: date), _col5 (type: double), _col6 (type: double), _col7 (type: decimal(10,2)), _col8 (type: decimal(10,2)), _col9 (type: decimal(12,2)), _col10 (type: decimal(12,2)), _col11 (type: decimal(10,2)), _col12 (type: decimal(10,2)), _col13 (type: timestamp), _col14 (type: int), _col15 (type: int), _col16 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index d8ab3c5..4cdf6ea 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -415,10 +415,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [10, 12, 13, 14, 11, 7, 16, 23] + valueColumns: 10:string, 12:string, 13:string, 14:int, 11:string, 7:int, 16:int, 23:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap @@ -700,10 +700,10 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:timestamp, 2:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [15, 26, 36, 40, 42, 44, 46, 53] + valueColumns: 15:string, 26:string, 36:string, 40:int, 42:string, 44:int, 46:int, 53:date Statistics: Num rows: 51 Data size: 16000 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out index 72cd1d3..a1e61f7 100644 --- ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vector_char_mapjoin1.q.out @@ -177,6 +177,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 3 @@ -387,6 +388,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -557,6 +559,7 @@ STAGE PLANS: className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out index 5279e77..c447748 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_3.q.out @@ -99,14 +99,15 @@ STAGE PLANS: 0 _col0 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 2] - smallTableMapping: [2] + projectedOutput: 0:bigint, 2:bigint + smallTableValueMapping: 2:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 48 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out index 409c68c..194929d 100644 --- ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out +++ ql/src/test/results/clientpositive/llap/vector_coalesce_4.q.out @@ -70,10 +70,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:int Statistics: Num rows: 2 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_all.q.out ql/src/test/results/clientpositive/llap/vector_complex_all.q.out index f2277c1..dc55271 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_all.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_all.q.out @@ -690,10 +690,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:string, 1:map, 2:array, 3:struct Statistics: Num rows: 1 Data size: 3440 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: map), _col2 (type: array), _col3 (type: struct) Execution mode: vectorized, llap @@ -730,10 +729,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -769,10 +766,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap Map Vectorization: @@ -810,10 +805,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -859,6 +853,9 @@ STAGE PLANS: output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat serde: org.apache.hadoop.hive.ql.io.orc.OrcSerde name: default.orc_create_complex + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-2 Dependency Collection @@ -949,10 +946,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -1174,10 +1170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 4721072 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -1317,10 +1313,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 13503 Data size: 7697400 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_complex_join.q.out ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index 98e7dc0..a7d0c32 100644 --- ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -72,6 +72,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Map 2 @@ -251,6 +252,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -394,6 +396,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_date_1.q.out ql/src/test/results/clientpositive/llap/vector_date_1.q.out index 2a77c39..38a4c79 100644 --- ql/src/test/results/clientpositive/llap/vector_date_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_date_1.q.out @@ -128,10 +128,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -293,10 +293,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 3, 4, 5, 6, 7, 8, 9, 10] + valueColumns: 1:date, 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean, 8:boolean, 9:boolean, 10:boolean Statistics: Num rows: 3 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean), _col6 (type: boolean), _col7 (type: boolean), _col8 (type: boolean), _col9 (type: boolean) Execution mode: vectorized, llap @@ -458,10 +458,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -623,10 +623,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7] + valueColumns: 3:boolean, 4:boolean, 5:boolean, 6:boolean, 7:boolean Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: boolean), _col2 (type: boolean), _col3 (type: boolean), _col4 (type: boolean), _col5 (type: boolean) Execution mode: vectorized, llap @@ -792,10 +792,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:date native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:date Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: date) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out index 5107015..96fba4b 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_1.q.out @@ -87,10 +87,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,10 +203,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -321,10 +319,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -438,10 +435,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +551,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -672,10 +667,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -789,10 +783,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -906,10 +899,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1023,10 +1015,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out index 5e835cd..a97329e 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_10_0.q.out @@ -76,10 +76,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 224 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -192,10 +191,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out index bc596b3..45b3dc3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_2.q.out @@ -65,10 +65,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -181,10 +180,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -297,10 +295,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -413,10 +410,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -529,10 +525,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -645,10 +640,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -761,10 +755,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -877,10 +870,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1004,10 +996,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1120,10 +1111,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1236,10 +1226,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1352,10 +1341,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1468,10 +1456,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1584,10 +1571,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1700,10 +1686,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1816,10 +1801,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out index 800a4ae..92874b3 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_6.q.out @@ -149,10 +149,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(10,5), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 2684 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -290,10 +289,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(17,4), 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -442,10 +440,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -486,10 +483,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(18,5) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 54 Data size: 5600 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -670,10 +666,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4] + keyColumns: 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:decimal(11,5) Statistics: Num rows: 27 Data size: 3132 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(11,5)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out index 4bb8a01..894fcf8 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_aggregate.q.out @@ -100,10 +100,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -279,10 +279,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(23,14), 10:decimal(23,14), 11:decimal(33,14), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap @@ -465,10 +465,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized, llap @@ -664,10 +664,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(16,0), 10:decimal(16,0), 11:decimal(26,0), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2662128 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out index 64433ea..d5a9173 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_expressions.q.out @@ -83,10 +83,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + keyColumns: 4:decimal(25,14), 6:decimal(26,14), 8:decimal(38,13), 10:decimal(38,17), 11:decimal(12,10), 12:int, 13:smallint, 14:tinyint, 15:bigint, 16:boolean, 17:double, 18:float, 19:string, 20:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -256,10 +255,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 6, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] + keyColumns: 4:decimal(11,3), 6:decimal(11,3), 8:decimal(21,11), 10:decimal(23,9), 11:decimal(5,3), 12:int, 13:smallint, 14:tinyint, 15:bigint, 16:boolean, 17:double, 18:float, 19:string, 20:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 455 Data size: 100294 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out index 87fc687..28b1740 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_mapjoin.q.out @@ -193,10 +193,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -457,10 +456,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(26,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(26,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(24,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(24,0)) Execution mode: vectorized, llap @@ -799,10 +798,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1065,10 +1063,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap @@ -1332,10 +1330,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 997 Data size: 106235 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1598,10 +1595,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(16,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(16,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(14,0) Statistics: Num rows: 997 Data size: 212470 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(14,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out index 18b903b..88a0444 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_precision.q.out @@ -601,10 +601,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:struct, 1:decimal(30,10) Statistics: Num rows: 1 Data size: 400 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap @@ -1211,10 +1210,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:struct, 1:decimal(30,10) Statistics: Num rows: 1 Data size: 512 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: decimal(30,10)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out index 6737052..7bdf8c8 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round.q.out @@ -74,10 +74,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -192,10 +192,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -338,10 +338,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -455,10 +455,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap @@ -600,10 +600,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(11,0) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(11,0)) Execution mode: vectorized, llap @@ -717,10 +717,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(11,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(10,0) Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(10,0)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out index e3d4f40..3ac4166 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_round_2.q.out @@ -78,10 +78,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14] + valueColumns: 3:decimal(21,0), 4:decimal(22,1), 5:decimal(23,2), 6:decimal(24,3), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0) Statistics: Num rows: 1 Data size: 1456 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -246,10 +246,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] + valueColumns: 4:decimal(21,0), 5:decimal(22,1), 6:decimal(23,2), 7:decimal(24,3), 8:decimal(25,4), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(22,1), 16:decimal(23,2), 17:decimal(24,3), 18:decimal(25,4), 19:decimal(21,0), 20:decimal(21,0), 21:decimal(21,0), 22:decimal(21,0) Statistics: Num rows: 1 Data size: 2240 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(22,1)), _col3 (type: decimal(23,2)), _col4 (type: decimal(24,3)), _col5 (type: decimal(25,4)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(22,1)), _col13 (type: decimal(23,2)), _col14 (type: decimal(24,3)), _col15 (type: decimal(25,4)), _col16 (type: decimal(21,0)), _col17 (type: decimal(21,0)), _col18 (type: decimal(21,0)), _col19 (type: decimal(21,0)) Execution mode: vectorized, llap @@ -441,10 +441,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:decimal(21,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34] + valueColumns: 3:decimal(21,0), 4:decimal(21,0), 5:decimal(21,0), 6:decimal(21,0), 7:decimal(21,0), 8:decimal(21,0), 9:decimal(21,0), 10:decimal(21,0), 11:decimal(21,0), 12:decimal(21,0), 13:decimal(21,0), 14:decimal(21,0), 15:decimal(21,0), 16:decimal(21,0), 17:decimal(21,0), 18:decimal(21,0), 19:decimal(22,1), 20:decimal(23,2), 21:decimal(24,3), 22:decimal(25,4), 23:decimal(26,5), 24:decimal(27,6), 25:decimal(28,7), 26:decimal(29,8), 27:decimal(30,9), 28:decimal(31,10), 29:decimal(32,11), 30:decimal(33,12), 31:decimal(34,13), 32:decimal(35,14), 33:decimal(36,15), 34:decimal(37,16) Statistics: Num rows: 1 Data size: 3808 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(21,0)), _col2 (type: decimal(21,0)), _col3 (type: decimal(21,0)), _col4 (type: decimal(21,0)), _col5 (type: decimal(21,0)), _col6 (type: decimal(21,0)), _col7 (type: decimal(21,0)), _col8 (type: decimal(21,0)), _col9 (type: decimal(21,0)), _col10 (type: decimal(21,0)), _col11 (type: decimal(21,0)), _col12 (type: decimal(21,0)), _col13 (type: decimal(21,0)), _col14 (type: decimal(21,0)), _col15 (type: decimal(21,0)), _col16 (type: decimal(21,0)), _col17 (type: decimal(22,1)), _col18 (type: decimal(23,2)), _col19 (type: decimal(24,3)), _col20 (type: decimal(25,4)), _col21 (type: decimal(26,5)), _col22 (type: decimal(27,6)), _col23 (type: decimal(28,7)), _col24 (type: decimal(29,8)), _col25 (type: decimal(30,9)), _col26 (type: decimal(31,10)), _col27 (type: decimal(32,11)), _col28 (type: decimal(33,12)), _col29 (type: decimal(34,13)), _col31 (type: decimal(35,14)), _col32 (type: decimal(36,15)), _col33 (type: decimal(37,16)) Execution mode: vectorized, llap @@ -625,10 +625,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3] + keyColumns: 3:decimal(30,9) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4] + valueColumns: 4:decimal(30,9) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,9)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out index b33f090..112def0 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_trailing.q.out @@ -108,10 +108,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:decimal(10,4), 2:decimal(15,8) Statistics: Num rows: 30 Data size: 4936 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(10,4)), _col2 (type: decimal(15,8)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out index 59b3c4a..7215f14 100644 --- ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out +++ ql/src/test/results/clientpositive/llap/vector_decimal_udf.q.out @@ -2316,10 +2316,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:decimal(30,10), 2:bigint, 3:struct Statistics: Num rows: 18 Data size: 7416 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(30,10)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap @@ -2383,10 +2383,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumns: 5:decimal(38,18), 3:decimal(24,14), 1:decimal(30,10) Statistics: Num rows: 17 Data size: 5780 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(38,18)), _col2 (type: decimal(24,14)), _col3 (type: decimal(30,10)) Reducer 3 @@ -3256,10 +3256,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -3391,10 +3391,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 18 Data size: 2952 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -3606,10 +3606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3737,10 +3736,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(20,10) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(20,10)) Execution mode: vectorized, llap @@ -3868,10 +3866,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -6254,10 +6251,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:decimal(25,3), 2:bigint, 3:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(25,3)), _col2 (type: bigint), _col3 (type: struct) Execution mode: vectorized, llap @@ -6322,10 +6319,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 3, 1] + valueColumns: 5:decimal(38,16), 3:decimal(19,7), 1:decimal(25,3) Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: decimal(38,16)), _col2 (type: decimal(19,7)), _col3 (type: decimal(25,3)) Reducer 3 @@ -7200,10 +7197,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -7336,10 +7333,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:struct, 2:struct Statistics: Num rows: 1 Data size: 116 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: struct), _col2 (type: struct) Execution mode: vectorized, llap @@ -7552,10 +7549,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7684,10 +7680,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(15,3) Statistics: Num rows: 1 Data size: 224 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: decimal(15,3)) Execution mode: vectorized, llap @@ -7816,10 +7811,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out new file mode 100644 index 0000000..057e2bb --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_full_outer_join.q.out @@ -0,0 +1,1403 @@ +PREHOOK: query: drop table if exists TJOIN1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table if exists TJOIN2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table if exists TJOIN2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1 +POSTHOOK: query: create table if not exists TJOIN1 (RNUM int , C1 int, C2 int) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1 +PREHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2 +POSTHOOK: query: create table if not exists TJOIN2 (RNUM int , C1 int, C2 char(2)) STORED AS orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2 +PREHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN1STAGE +POSTHOOK: query: create table if not exists TJOIN1STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN1STAGE +PREHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@TJOIN2STAGE +POSTHOOK: query: create table if not exists TJOIN2STAGE (RNUM int , C1 int, C2 char(2)) ROW FORMAT DELIMITED FIELDS TERMINATED BY '|' LINES TERMINATED BY '\n' STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@TJOIN2STAGE +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin1stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin1.txt' OVERWRITE INTO TABLE TJOIN1STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin1stage +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@tjoin2stage +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/tjoin2.txt' OVERWRITE INTO TABLE TJOIN2STAGE +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@tjoin2stage +PREHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1stage +PREHOOK: Output: default@tjoin1 +POSTHOOK: query: INSERT INTO TABLE TJOIN1 SELECT * from TJOIN1STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1stage +POSTHOOK: Output: default@tjoin1 +POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 +PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin2stage +PREHOOK: Output: default@tjoin2 +POSTHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin2stage +POSTHOOK: Output: default@tjoin2 +POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] +POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] +POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + fullOuterIntersect: true + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:int, col 0:int, col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + fullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 1 (SIMPLE_EDGE), Map 4 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:int, 5:char(2) + smallTableValueMapping: 5:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Map 4 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, string] + Reduce Operator Tree: + Select Operator + expressions: VALUE._col0 (type: int), KEY.reducesinkkey0 (type: int), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 0, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 1:int, 0:int, 2:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 1:int, 0:int, 2:int, 3:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + input vertices: + 1 Map 4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + fullOuterIntersect: true + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 710 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c1 as c1j2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c1j2 c2j2 +0 10 15 10 BB +0 10 15 10 FF +1 20 25 NULL NULL +2 NULL 50 NULL NULL +NULL NULL NULL 15 DD +NULL NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int), c2 (type: char(2)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1, 2] + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) + Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col1 (type: char(2)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2, _col4 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col2 (type: int), _col4 (type: char(2)) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 686 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 +0 10 15 BB +0 10 15 FF +1 20 25 NULL +2 NULL 50 NULL +NULL NULL NULL DD +NULL NULL NULL EE +PREHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail +select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 3 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: tjoin1 + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] + Select Operator + expressions: rnum (type: int), c1 (type: int), c2 (type: int) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:int + Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: int), _col2 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: tjoin2 + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] + Select Operator + expressions: c1 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 4 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 7 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +PREHOOK: type: QUERY +PREHOOK: Input: default@tjoin1 +PREHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +POSTHOOK: query: select tjoin1.rnum, tjoin1.c1, tjoin1.c2 from tjoin1 full outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 ) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tjoin1 +POSTHOOK: Input: default@tjoin2 +#### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 +0 10 15 +0 10 15 +1 20 25 +2 NULL 50 +NULL NULL NULL +NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out new file mode 100644 index 0000000..177bc7d --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_fast.q.out @@ -0,0 +1,8988 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 2:bigint, 3:date + smallTableValueMapping: 3:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint, 3:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 2:smallint, 3:timestamp + smallTableValueMapping: 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 3:int, 4:decimal(38,18) + smallTableValueMapping: 4:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 3:int, 4:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 3:smallint, 4:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:smallint, 4:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + smallTableValueMapping: 7:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 2:string, 3:date, 4:timestamp + smallTableValueMapping: 3:date, 4:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:date, 4:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterIntersectStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Fast Hash Table and No Hybrid Hash Join IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: FAST + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out new file mode 100644 index 0000000..18fd4b4 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized.q.out @@ -0,0 +1,8988 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 2:bigint, 3:date + smallTableValueMapping: 3:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:bigint, 3:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 2:smallint, 3:timestamp + smallTableValueMapping: 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:string, 3:int, 4:decimal(38,18) + smallTableValueMapping: 4:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 3:int, 4:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 3:smallint, 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:smallint, 4:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 4, 1 -> 5, 2 -> 6 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + smallTableValueMapping: 7:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 4:timestamp, 5:smallint, 6:string, 7:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterIntersectMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 2:string, 3:date, 4:timestamp + smallTableValueMapping: 3:date, 4:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:date, 4:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterIntersectStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyColumns: 0:bigint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:bigint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:bigint, 1:bigint, 2:date + smallTableValueMapping: 2:date + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:smallint + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:smallint, 2:timestamp + smallTableValueMapping: 2:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:string, 2:int, 3:decimal(38,18) + smallTableValueMapping: 3:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 2 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 2:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string + className: VectorMapJoinFullOuterStringOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string, 2:date, 3:timestamp + smallTableValueMapping: 2:date, 3:timestamp + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:smallint, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:smallint, 1:int + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 2, 1 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:smallint, 1:int, 2:smallint, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyColumns: 0:timestamp, 1:smallint, 2:string + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:timestamp, 1:smallint, 2:string + className: VectorMapJoinFullOuterMultiKeyOperator + fullOuterSmallTableKeyMapping: 0 -> 3, 1 -> 4, 2 -> 5 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:timestamp, 1:smallint, 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + smallTableValueMapping: 6:decimal(38,18) + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out new file mode 100644 index 0000000..55c2643 --- /dev/null +++ ql/src/test/results/clientpositive/llap/vector_fullouter_mapjoin_1_optimized_passthru.q.out @@ -0,0 +1,8874 @@ +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: Lineage: fullouter_long_big_1a.key SIMPLE [(fullouter_long_big_1a_txt)fullouter_long_big_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull_txt(key bigint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: Lineage: fullouter_long_big_1a_nonull.key SIMPLE [(fullouter_long_big_1a_nonull_txt)fullouter_long_big_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: Lineage: fullouter_long_small_1a.key SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a.s_date SIMPLE [(fullouter_long_small_1a_txt)fullouter_long_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull_txt(key bigint, s_date date) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_long_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_long_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_long_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_long_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_long_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.key SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:key, type:bigint, comment:null), ] +POSTHOOK: Lineage: fullouter_long_small_1a_nonull.s_date SIMPLE [(fullouter_long_small_1a_nonull_txt)fullouter_long_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +PREHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Output: default@fullouter_long_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +PREHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Output: default@fullouter_long_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +PREHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a +PREHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a +POSTHOOK: Output: default@fullouter_long_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +PREHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1a_nonull +PREHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +POSTHOOK: Output: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: bigint) + 1 _col0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: CREATE TABLE fullouter_long_big_1b(key smallint) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1b.txt' OVERWRITE INTO TABLE fullouter_long_big_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: CREATE TABLE fullouter_long_small_1b(key smallint, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1b.txt' OVERWRITE INTO TABLE fullouter_long_small_1b +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +PREHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Output: default@fullouter_long_big_1b +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +PREHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1b +PREHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1b compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1b +POSTHOOK: Output: default@fullouter_long_small_1b +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint) + 1 _col0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 5 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: CREATE TABLE fullouter_long_big_1c(key int, b_string string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1c.txt' OVERWRITE INTO TABLE fullouter_long_big_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: CREATE TABLE fullouter_long_small_1c(key int, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1c.txt' OVERWRITE INTO TABLE fullouter_long_small_1c +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +PREHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Output: default@fullouter_long_big_1c +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +PREHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1c +PREHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1c compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1c +POSTHOOK: Output: default@fullouter_long_small_1c +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: CREATE TABLE fullouter_long_big_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_big_1d.txt' OVERWRITE INTO TABLE fullouter_long_big_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: CREATE TABLE fullouter_long_small_1d(key int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_long_small_1d.txt' OVERWRITE INTO TABLE fullouter_long_small_1d +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +PREHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_big_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Output: default@fullouter_long_big_1d +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +PREHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_small_1d +PREHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_long_small_1d compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_small_1d +POSTHOOK: Output: default@fullouter_long_small_1d +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: Lineage: fullouter_multikey_big_1a.key0 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a.key1 SIMPLE [(fullouter_multikey_big_1a_txt)fullouter_multikey_big_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key0 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1a_nonull.key1 SIMPLE [(fullouter_multikey_big_1a_nonull_txt)fullouter_multikey_big_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: Lineage: fullouter_multikey_small_1a.key0 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a.key1 SIMPLE [(fullouter_multikey_small_1a_txt)fullouter_multikey_small_1a_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull_txt(key0 smallint, key1 int) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key0 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key0, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1a_nonull.key1 SIMPLE [(fullouter_multikey_small_1a_nonull_txt)fullouter_multikey_small_1a_nonull_txt.FieldSchema(name:key1, type:int, comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +PREHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Output: default@fullouter_multikey_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +PREHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a +PREHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a +POSTHOOK: Output: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +PREHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +PREHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +POSTHOOK: Output: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: smallint), _col1 (type: int) + 1 _col0 (type: smallint), _col1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b_txt(key0 timestamp, key1 smallint, key2 string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_big_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_big_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_big_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_big_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_big_1b +POSTHOOK: Lineage: fullouter_multikey_big_1b.key0 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key1 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_big_1b.key2 SIMPLE [(fullouter_multikey_big_1b_txt)fullouter_multikey_big_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b_txt(key0 timestamp, key1 smallint, key2 string, s_decimal decimal(38, 18)) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_multikey_small_1b.txt' OVERWRITE INTO TABLE fullouter_multikey_small_1b_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: query: CREATE TABLE fullouter_multikey_small_1b STORED AS ORC AS SELECT * FROM fullouter_multikey_small_1b_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_multikey_small_1b +POSTHOOK: Lineage: fullouter_multikey_small_1b.key0 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key0, type:timestamp, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key1 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key1, type:smallint, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.key2 SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:key2, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_multikey_small_1b.s_decimal SIMPLE [(fullouter_multikey_small_1b_txt)fullouter_multikey_small_1b_txt.FieldSchema(name:s_decimal, type:decimal(38,18), comment:null), ] +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +PREHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b_txt +PREHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_big_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b_txt +POSTHOOK: Output: default@fullouter_multikey_big_1b_txt +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +PREHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_small_1b_txt +PREHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_multikey_small_1b_txt compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_small_1b_txt +POSTHOOK: Output: default@fullouter_multikey_small_1b_txt +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + 1 _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 5 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: Lineage: fullouter_string_big_1a.key SIMPLE [(fullouter_string_big_1a_txt)fullouter_string_big_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull_txt(key string) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_big_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_big_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_big_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_big_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_big_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_big_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_big_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: Lineage: fullouter_string_big_1a_nonull.key SIMPLE [(fullouter_string_big_1a_nonull_txt)fullouter_string_big_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: Lineage: fullouter_string_small_1a.key SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_date SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a.s_timestamp SIMPLE [(fullouter_string_small_1a_txt)fullouter_string_small_1a_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull_txt(key string, s_date date, s_timestamp timestamp) +row format delimited fields terminated by ',' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/fullouter_string_small_1a_nonull.txt' OVERWRITE INTO TABLE fullouter_string_small_1a_nonull_txt +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@fullouter_string_small_1a_nonull_txt +PREHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@fullouter_string_small_1a_nonull_txt +PREHOOK: Output: database:default +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: CREATE TABLE fullouter_string_small_1a_nonull STORED AS ORC AS SELECT * FROM fullouter_string_small_1a_nonull_txt +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@fullouter_string_small_1a_nonull_txt +POSTHOOK: Output: database:default +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.key SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:key, type:string, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_date SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_date, type:date, comment:null), ] +POSTHOOK: Lineage: fullouter_string_small_1a_nonull.s_timestamp SIMPLE [(fullouter_string_small_1a_nonull_txt)fullouter_string_small_1a_nonull_txt.FieldSchema(name:s_timestamp, type:timestamp, comment:null), ] +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +PREHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Output: default@fullouter_string_big_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +PREHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_big_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Output: default@fullouter_string_big_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +PREHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a +PREHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a +POSTHOOK: Output: default@fullouter_string_small_1a +#### A masked pattern was here #### +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +PREHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_small_1a_nonull +PREHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: analyze table fullouter_string_small_1a_nonull compute statistics for columns +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +POSTHOOK: Output: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 5 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 54 Data size: 3432 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 59 Data size: 3775 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL 2024-01-23 +NULL NULL 2098-02-10 +NULL NULL 2242-02-08 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 11 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: bigint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:bigint, 1:s_date:date, 2:ROW__ID:struct] + Select Operator + expressions: key (type: bigint), s_date (type: date) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Map-reduce partition columns: _col0 (type: bigint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date + Statistics: Num rows: 51 Data size: 3264 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:bigint, s_date:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: bigint) + 1 KEY.reducesinkkey0 (type: bigint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:bigint + bigTableValueExpressions: col 0:bigint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:bigint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:bigint, 2:date + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: date) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:bigint, VALUE._col0:bigint, VALUE._col1:date + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: date) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 56 Data size: 3590 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1a_nonull +PREHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date FROM fullouter_long_big_1a_nonull b FULL OUTER JOIN fullouter_long_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1a_nonull +POSTHOOK: Input: default@fullouter_long_small_1a_nonull +#### A masked pattern was here #### +-5206670856103795573 NULL NULL +-5310365297525168078 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-6187919478609154811 NULL NULL +-8460550397108077433 NULL NULL +1569543799237464101 NULL NULL +3313583664488247651 NULL NULL +968819023021777205 NULL NULL +NULL -1339636982994067311 2000-06-20 +NULL -1339636982994067311 2008-12-03 +NULL -2098090254092150988 1817-03-12 +NULL -2098090254092150988 2163-05-26 +NULL -2098090254092150988 2219-12-23 +NULL -2184423060953067642 1853-07-06 +NULL -2184423060953067642 1880-10-06 +NULL -2575185053386712613 1809-07-12 +NULL -2575185053386712613 2105-01-21 +NULL -2688622006344936758 1948-10-15 +NULL -2688622006344936758 2129-01-11 +NULL -327698348664467755 2222-10-15 +NULL -3655445881497026796 2108-08-16 +NULL -4224290881682877258 1813-05-17 +NULL -4224290881682877258 2120-01-16 +NULL -4224290881682877258 2185-07-08 +NULL -4961171400048338491 2196-08-10 +NULL -5706981533666803767 1800-09-20 +NULL -5706981533666803767 2151-06-09 +NULL -5754527700632192146 1958-07-15 +NULL -614848861623872247 2101-05-25 +NULL -614848861623872247 2112-11-09 +NULL -6784441713807772877 1845-02-16 +NULL -6784441713807772877 2054-06-17 +NULL -7707546703881534780 2134-08-20 +NULL 214451696109242839 1855-05-12 +NULL 214451696109242839 1977-01-04 +NULL 214451696109242839 2179-04-18 +NULL 2438535236662373438 1881-09-16 +NULL 2438535236662373438 1916-01-10 +NULL 2438535236662373438 2026-06-23 +NULL 3845554233155411208 1805-11-10 +NULL 3845554233155411208 2264-04-05 +NULL 3873405809071478736 1918-11-20 +NULL 3873405809071478736 2034-06-09 +NULL 3873405809071478736 2164-04-23 +NULL 3905351789241845882 1866-07-28 +NULL 3905351789241845882 2045-12-05 +NULL 434940853096155515 2275-02-08 +NULL 4436884039838843341 2031-05-23 +NULL 5246983111579595707 1817-07-01 +NULL 5246983111579595707 2260-05-11 +NULL 5252407779338300447 2039-03-10 +NULL 5252407779338300447 2042-04-26 +NULL 6049335087268933751 2086-12-17 +NULL 6049335087268933751 2282-06-09 +NULL 7297177530102477725 1921-05-11 +NULL 7297177530102477725 1926-04-12 +NULL 7297177530102477725 2125-08-26 +NULL 7937120928560087303 2083-03-14 +NULL 8755921538765428593 1827-05-01 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:ROW__ID:struct] + Select Operator + expressions: key (type: smallint) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 63 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:smallint, 1:s_timestamp:timestamp, 2:ROW__ID:struct] + Select Operator + expressions: key (type: smallint), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Map-reduce partition columns: _col0 (type: smallint) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:timestamp + Statistics: Num rows: 72 Data size: 2208 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:smallint, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:smallint + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint) + 1 KEY.reducesinkkey0 (type: smallint) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint + bigTableValueExpressions: col 0:smallint + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2 + input vertices: + 1 Map 4 + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:smallint, 2:timestamp + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: smallint), _col2 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:smallint, VALUE._col0:smallint, VALUE._col1:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), VALUE._col0 (type: smallint), VALUE._col1 (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 79 Data size: 2428 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1b +PREHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_timestamp FROM fullouter_long_big_1b b FULL OUTER JOIN fullouter_long_small_1b s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1b +POSTHOOK: Input: default@fullouter_long_small_1b +#### A masked pattern was here #### +-25394 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +31713 NULL NULL +32030 32030 2101-09-09 07:35:05.145 +NULL -14172 1918-09-13 11:44:24.496926711 +NULL -14172 2355-01-14 23:23:34 +NULL -14172 2809-06-07 02:10:58 +NULL -15361 2219-09-15 20:15:03.000169887 +NULL -15361 2434-08-13 20:37:07.000172979 +NULL -15427 2023-11-09 19:31:21 +NULL -15427 2046-06-07 22:58:40.728 +NULL -15427 2355-01-08 12:34:11.617 +NULL -19167 2230-12-22 20:25:39.000242111 +NULL -19167 2319-08-26 11:07:11.268 +NULL -20517 2233-12-20 04:06:56.666522799 +NULL -20517 2774-06-23 12:04:06.5 +NULL -20824 2478-11-05 00:28:05 +NULL -22422 1949-03-13 00:07:53.075 +NULL -22422 2337-07-19 06:33:02.000353352 +NULL -22422 2982-12-28 06:30:26.000883228 +NULL -23117 2037-01-05 21:52:30.685952759 +NULL -24775 2035-03-26 08:11:23.375224153 +NULL -24775 2920-08-06 15:58:28.261059449 +NULL -26998 2268-08-04 12:48:11.848006292 +NULL -26998 2428-12-26 07:53:45.96925825 +NULL -26998 2926-07-18 09:02:46.077 +NULL -29600 2333-11-02 15:06:30 +NULL -30059 2269-05-04 21:23:44.000339209 +NULL -30059 2420-12-10 22:12:30 +NULL -30059 2713-10-13 09:28:49 +NULL -30306 2619-05-24 10:35:58.000774018 +NULL -4279 2214-09-10 03:53:06 +NULL -4279 2470-08-12 11:21:14.000955747 +NULL -7373 2662-10-28 12:07:02.000526564 +NULL -7624 2219-12-03 17:07:19 +NULL -7624 2289-08-28 00:14:34 +NULL -7624 2623-03-20 03:18:45.00006465 +NULL -8087 2550-06-26 23:57:42.588007617 +NULL -8087 2923-07-02 11:40:26.115 +NULL -8435 2642-02-07 11:45:04.353231638 +NULL -8435 2834-12-06 16:38:18.901 +NULL -8624 2120-02-15 15:36:40.000758423 +NULL -8624 2282-03-28 07:58:16 +NULL -8624 2644-05-04 04:45:07.839 +NULL 10553 2168-05-05 21:10:59.000152113 +NULL 11232 2038-04-06 14:53:59 +NULL 11232 2507-01-27 22:04:22.49661421 +NULL 11232 2533-11-26 12:22:18 +NULL 13598 2421-05-20 14:18:31.000264698 +NULL 13598 2909-06-25 23:22:50 +NULL 14865 2079-10-06 16:54:35.117 +NULL 14865 2220-02-28 03:41:36 +NULL 14865 2943-03-21 00:42:10.505 +NULL 17125 2236-07-14 01:54:40.927230276 +NULL 17125 2629-11-15 15:34:52 +NULL 21181 2253-03-12 11:55:48.332 +NULL 21181 2434-02-20 00:46:29.633 +NULL 21436 2526-09-22 23:44:55 +NULL 21436 2696-05-08 05:19:24.112 +NULL 24870 2752-12-26 12:32:23.03685163 +NULL 2632 2561-12-15 15:42:27 +NULL 26484 1919-03-04 07:32:37.519 +NULL 26484 2953-03-10 02:05:26.508953676 +NULL 2748 2298-06-20 21:01:24 +NULL 2748 2759-02-13 18:04:36.000307355 +NULL 2748 2862-04-20 13:12:39.482805897 +NULL 29407 2385-12-14 06:03:39.597 +NULL 3198 2223-04-14 13:20:49 +NULL 3198 2428-06-13 16:21:33.955 +NULL 3198 2736-12-20 03:59:50.343550301 +NULL 4510 2293-01-17 13:47:41.00001006 +NULL 4510 2777-03-24 03:44:28.000169723 +NULL NULL 2124-05-07 15:01:19.021 +NULL NULL 2933-06-20 11:48:09.000839488 +NULL NULL 2971-08-07 12:02:11.000948152 +NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:b_string:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), b_string (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + Statistics: Num rows: 11 Data size: 173 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, b_string:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:s_decimal:decimal(38,18), 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:decimal(38,18) + Statistics: Num rows: 81 Data size: 1703 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: int) + 1 KEY.reducesinkkey0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:int, 3:decimal(38,18) + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:string, VALUE._col1:int, VALUE._col2:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 89 Data size: 1873 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1c +PREHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, b.b_string, s.key, s.s_decimal FROM fullouter_long_big_1c b FULL OUTER JOIN fullouter_long_small_1c s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1c +POSTHOOK: Input: default@fullouter_long_small_1c +#### A masked pattern was here #### +-1437463633 JU NULL NULL +-1437463633 NULL NULL NULL +-1437463633 SOWDWMS NULL NULL +-1437463633 TKTKGVGFW NULL NULL +-1437463633 YYXPPCH NULL NULL +1725068083 MKSCCE NULL NULL +1928928239 AMKTIWQ NULL NULL +1928928239 NULL NULL NULL +1928928239 NULL NULL NULL +1928928239 VAQHVRI NULL NULL +NULL ABBZ NULL NULL +NULL NULL -1093006502 -69.556658280000000000 +NULL NULL -1197550983 -0.558879692200000000 +NULL NULL -1197550983 0.100000000000000000 +NULL NULL -1197550983 71852.833867441261300000 +NULL NULL -1250662632 -544.554649000000000000 +NULL NULL -1250662632 5454127198.951479000000000000 +NULL NULL -1250662632 93104.000000000000000000 +NULL NULL -1264372462 -6993985240226.000000000000000000 +NULL NULL -1264372462 -899.000000000000000000 +NULL NULL -1264372462 0.883000000000000000 +NULL NULL -1490239076 92253.232096000000000000 +NULL NULL -1681455031 -11105.372477000000000000 +NULL NULL -1681455031 -6.454300000000000000 +NULL NULL -1740848088 -9.157000000000000000 +NULL NULL -1740848088 0.506394259000000000 +NULL NULL -1740848088 901.441000000000000000 +NULL NULL -2048404259 -0.322296044625100000 +NULL NULL -2048404259 3939387044.100000000000000000 +NULL NULL -2123273881 -55.891980000000000000 +NULL NULL -2123273881 3.959000000000000000 +NULL NULL -243940373 -583.258000000000000000 +NULL NULL -243940373 -97176129669.654953000000000000 +NULL NULL -369457052 560.119078830904550000 +NULL NULL -369457052 7.700000000000000000 +NULL NULL -424713789 0.480000000000000000 +NULL NULL -466171792 0.000000000000000000 +NULL NULL -466171792 4227.534400000000000000 +NULL NULL -466171792 69.900000000000000000 +NULL NULL -477147437 6.000000000000000000 +NULL NULL -793950320 -0.100000000000000000 +NULL NULL -793950320 -16.000000000000000000 +NULL NULL -934092157 -7843850349.571300380000000000 +NULL NULL -99948814 -38076694.398100000000000000 +NULL NULL -99948814 -96386.438000000000000000 +NULL NULL 1039864870 0.700000000000000000 +NULL NULL 1039864870 94.040000000000000000 +NULL NULL 1039864870 987601.570000000000000000 +NULL NULL 1091836730 -5017.140000000000000000 +NULL NULL 1091836730 0.020000000000000000 +NULL NULL 1242586043 -4.000000000000000000 +NULL NULL 1242586043 -749975924224.630000000000000000 +NULL NULL 1242586043 71.148500000000000000 +NULL NULL 1479580778 92077343080.700000000000000000 +NULL NULL 150678276 -8278.000000000000000000 +NULL NULL 150678276 15989394.843600000000000000 +NULL NULL 1519948464 152.000000000000000000 +NULL NULL 1561921421 -5.405000000000000000 +NULL NULL 1561921421 53050.550000000000000000 +NULL NULL 1585021913 -5762331.066971120000000000 +NULL NULL 1585021913 607.227470000000000000 +NULL NULL 1585021913 745222.668089540000000000 +NULL NULL 1719049112 -7888197.000000000000000000 +NULL NULL 1738753776 -99817635066320.241600000000000000 +NULL NULL 1738753776 1525.280459649262000000 +NULL NULL 1755897735 -39.965207000000000000 +NULL NULL 1785750809 47443.115000000000000000 +NULL NULL 1801735854 -1760956929364.267000000000000000 +NULL NULL 1801735854 -438541294.700000000000000000 +NULL NULL 1816559437 -1035.700900000000000000 +NULL NULL 1909136587 -8610.078036935181000000 +NULL NULL 1909136587 181.076815359440000000 +NULL NULL 193709887 -0.566300000000000000 +NULL NULL 193709887 -19889.830000000000000000 +NULL NULL 193709887 0.800000000000000000 +NULL NULL 284554389 5.727146000000000000 +NULL NULL 294598722 -3542.600000000000000000 +NULL NULL 294598722 -9377326244.444000000000000000 +NULL NULL 448130683 -4302.485366846491000000 +NULL NULL 452719211 3020.293893074463600000 +NULL NULL 452719211 83003.437220000000000000 +NULL NULL 466567142 -58810.605860000000000000 +NULL NULL 466567142 -9763217822.129028000000000000 +NULL NULL 466567142 196.578529539858400000 +NULL NULL 560745412 678.250000000000000000 +NULL NULL 698032489 -330457.429262583900000000 +NULL NULL 891262439 -0.040000000000000000 +NULL NULL 90660785 -4564.517185000000000000 +NULL NULL 90660785 12590.288613000000000000 +NULL NULL NULL 1.089120893565337000 +NULL NULL NULL 4.261652270000000000 +NULL NULL NULL 682070836.264960300000000000 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 106 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 5 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:ROW__ID:struct] + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 39 Data size: 381 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, VALUE._col0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int), VALUE._col0 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 5 + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + Statistics: Num rows: 42 Data size: 419 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_long_big_1d +PREHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key FROM fullouter_long_big_1d b FULL OUTER JOIN fullouter_long_small_1d s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_long_big_1d +POSTHOOK: Input: default@fullouter_long_small_1d +#### A masked pattern was here #### +-1780951928 NULL +-2038654700 -2038654700 +-670834064 NULL +-702028721 NULL +-702028721 NULL +-702028721 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +-814597051 NULL +NULL -1003639073 +NULL -1014271154 +NULL -1036083124 +NULL -1210744742 +NULL -1323620496 +NULL -1379355738 +NULL -1712018127 +NULL -1792852276 +NULL -1912571616 +NULL -497171161 +NULL -683339273 +NULL -707688773 +NULL -747044796 +NULL -894799664 +NULL -932176731 +NULL 103640700 +NULL 1164387380 +NULL 1372592319 +NULL 1431997749 +NULL 1614287784 +NULL 162858059 +NULL 1635405412 +NULL 1685473722 +NULL 1780951928 +NULL 1825107160 +NULL 1831520491 +NULL 1840266070 +NULL 1997943409 +NULL 2119085509 +NULL 246169862 +NULL 260588085 +NULL 41376947 +NULL 436878811 +NULL 533298451 +NULL 670834064 +NULL 699007128 +NULL 699863556 +NULL NULL +NULL NULL +NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 38 Data size: 6606 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 41 Data size: 7266 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL 1865-11-08 2893-04-07 07:36:12 +NULL NULL 1915-02-22 2554-10-27 09:34:30 +NULL NULL 2250-04-22 2548-03-21 08:23:13.133573801 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL NULL NULL NULL +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 12 Data size: 1056 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + includeColumns: [0] + dataColumns: key:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:s_date:date, 2:s_timestamp:timestamp, 3:ROW__ID:struct] + Select Operator + expressions: key (type: string), s_date (type: date), s_timestamp (type: timestamp) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:date, 2:timestamp + Statistics: Num rows: 35 Data size: 6318 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: timestamp) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:string, s_date:date, s_timestamp:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, bigint, timestamp] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: string) + 1 KEY.reducesinkkey0 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:string + bigTableValueExpressions: col 0:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string, 2:date, 3:timestamp + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string), _col2 (type: date), _col3 (type: timestamp) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:string, VALUE._col0:string, VALUE._col1:date, VALUE._col2:timestamp + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: string), VALUE._col1 (type: date), VALUE._col2 (type: timestamp) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 38 Data size: 6949 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_string_big_1a_nonull +PREHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key, s.key, s.s_date, s.s_timestamp FROM fullouter_string_big_1a_nonull b FULL OUTER JOIN fullouter_string_small_1a_nonull s ON b.key = s.key +order by b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_string_big_1a_nonull +POSTHOOK: Input: default@fullouter_string_small_1a_nonull +#### A masked pattern was here #### +FTWURVH FTWURVH 1976-03-10 2683-11-22 13:07:04.66673556 +MXGDMBD MXGDMBD 1880-11-01 2765-10-06 13:28:17.000688592 +NULL 1985-01-22 2111-01-10 15:44:28 +NULL 2021-02-21 2802-04-21 18:48:18.5933838 +NULL AARNZRVZQ 2000-11-13 2309-06-05 19:54:13 +NULL AARNZRVZQ 2002-10-23 2525-05-12 15:59:35 +NULL ATZJTPECF 1829-10-16 2357-05-08 07:09:09.000482799 +NULL ATZJTPECF 2217-10-22 2808-10-20 16:01:24.558 +NULL BDBMW 2278-04-27 2101-02-21 08:53:34.692 +NULL BEP 2141-02-19 2521-06-09 01:20:07.121 +NULL BEP 2206-08-10 2331-10-09 10:59:51 +NULL CQMTQLI 2031-09-13 1927-02-13 08:39:24.000919094 +NULL CQMTQLI 2090-11-13 2693-03-17 16:19:55.82 +NULL FROPIK 2023-02-28 2467-05-11 06:04:13.426693647 +NULL FROPIK 2124-10-01 2974-07-06 12:05:08.000146048 +NULL FROPIK 2214-02-09 1949-08-18 17:14:37.000703738 +NULL FYW 1807-03-20 2305-08-17 01:32:44 +NULL GOYJHW 1959-04-27 NULL +NULL GOYJHW 1976-03-06 2805-07-10 10:51:57.00083302 +NULL GOYJHW 1993-04-07 1950-05-04 09:28:21.000114784 +NULL GSJPSIYOU 1948-07-17 2006-09-24 16:01:24.000239251 +NULL IOQIDQBHU 2198-02-08 2073-03-21 15:32:57.617920888 +NULL IWEZJHKE NULL NULL +NULL KL 1980-09-22 2073-08-25 11:51:10.318 +NULL LOTLS 1957-11-09 2092-06-07 06:42:30.000538454 +NULL LOTLS 2099-08-04 2181-01-25 01:04:25.000030055 +NULL LOTLS 2126-09-16 1977-12-15 15:28:56 +NULL NADANUQMW 2037-10-19 2320-04-26 18:50:25.000426922 +NULL QTSRKSKB 2144-01-13 2627-12-20 03:38:53.000389266 +NULL SDA 2196-04-12 2462-10-26 19:28:12.733 +NULL VNRXWQ 1883-02-06 2287-07-17 16:46:58.287 +NULL VNRXWQ 2276-11-16 2072-08-16 17:45:47.48349887 +NULL WNGFTTY 1843-06-10 2411-01-28 20:03:59 +NULL WNGFTTY 2251-08-16 2649-12-21 18:30:42.498 +NULL ZNOUDCR NULL 1988-04-23 08:40:21 +PXLD NULL NULL NULL +PXLD NULL NULL NULL +PXLD NULL NULL NULL +QNCYBDW NULL NULL NULL +UA NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +WXHJ NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 92 Data size: 724 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 101 Data size: 796 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL 1082230084 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 13 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +3556 NULL NULL NULL +NULL 1082230084 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +NULL NULL NULL NULL +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 10 Data size: 80 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:smallint, 1:key1:int, 2:ROW__ID:struct] + Select Operator + expressions: key0 (type: smallint), key1 (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Map-reduce partition columns: _col0 (type: smallint), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 90 Data size: 720 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key0:smallint, key1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + 1 KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:smallint, col 1:int + bigTableValueExpressions: col 0:smallint, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 1 Map 4 + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: smallint), _col1 (type: int) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:smallint, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:smallint, 3:int + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: smallint), _col3 (type: int) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:smallint, KEY.reducesinkkey1:int, VALUE._col0:smallint, VALUE._col1:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey1 (type: int), VALUE._col0 (type: smallint), VALUE._col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 99 Data size: 792 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1a_nonull +PREHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, s.key0, s.key1 FROM fullouter_multikey_big_1a_nonull b FULL OUTER JOIN fullouter_multikey_small_1a_nonull s ON b.key0 = s.key0 AND b.key1 = s.key1 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1a_nonull +POSTHOOK: Input: default@fullouter_multikey_small_1a_nonull +#### A masked pattern was here #### +-17582 -1730236061 NULL NULL +-17582 1082230084 NULL NULL +-17582 267529350 -17582 267529350 +-17582 827141667 NULL NULL +-17582 9637312 NULL NULL +-18222 -1969080993 NULL NULL +-6131 -1969080993 -6131 -1969080993 +1499 371855128 NULL NULL +22767 -1969080993 NULL NULL +3556 -1969080993 NULL NULL +NULL NULL -11868 -3536499 +NULL NULL -11868 -915441041 +NULL NULL -11868 1052120431 +NULL NULL -11868 1318114822 +NULL NULL -11868 1456809245 +NULL NULL -11868 1658440922 +NULL NULL -11868 930596435 +NULL NULL -11868 97203778 +NULL NULL -12252 1956403781 +NULL NULL -12252 964377504 +NULL NULL -15212 -2055239583 +NULL NULL -17788 -1361776766 +NULL NULL -17788 -738743861 +NULL NULL -17788 -872691214 +NULL NULL -17788 528419995 +NULL NULL -1787 -63842445 +NULL NULL -20125 -1995259010 +NULL NULL -20900 1078466156 +NULL NULL -22311 -2055239583 +NULL NULL -23457 -63842445 +NULL NULL -2407 1078466156 +NULL NULL -24206 -1456409156 +NULL NULL -24206 641361618 +NULL NULL -26894 -63842445 +NULL NULL -28129 -2055239583 +NULL NULL -28137 -63842445 +NULL NULL -28313 -706104224 +NULL NULL -28313 51228026 +NULL NULL -28313 837320573 +NULL NULL -4117 -1386947816 +NULL NULL -5734 1078466156 +NULL NULL -6061 -586336015 +NULL NULL -7386 -1635102480 +NULL NULL -7386 -2112062470 +NULL NULL -7386 100736776 +NULL NULL -980 -270600267 +NULL NULL -980 -333603940 +NULL NULL -980 -465544127 +NULL NULL -980 -801821285 +NULL NULL -980 1310479628 +NULL NULL -980 2009785365 +NULL NULL -980 356970043 +NULL NULL -980 628784462 +NULL NULL -980 712692345 +NULL NULL 11460 1078466156 +NULL NULL 12089 -63842445 +NULL NULL 13672 -63842445 +NULL NULL 14400 -825652334 +NULL NULL 15061 -63842445 +NULL NULL 15404 1078466156 +NULL NULL 16166 931172175 +NULL NULL 16696 -63842445 +NULL NULL 20156 -1618478138 +NULL NULL 20156 1165375499 +NULL NULL 20156 1855042153 +NULL NULL 20156 963883665 +NULL NULL 20969 -1995259010 +NULL NULL 21186 -586336015 +NULL NULL 22934 -1695419330 +NULL NULL 23015 -1893013623 +NULL NULL 23015 -217613200 +NULL NULL 23015 -252525791 +NULL NULL 23015 -276888585 +NULL NULL 23015 -696928205 +NULL NULL 23015 -893234501 +NULL NULL 23015 258882280 +NULL NULL 23015 564751472 +NULL NULL 26738 -2055239583 +NULL NULL 26944 -1995259010 +NULL NULL 30353 -1007182618 +NULL NULL 30353 -1011627089 +NULL NULL 30353 -1507157031 +NULL NULL 30353 105613996 +NULL NULL 30353 1241923267 +NULL NULL 30353 1364268303 +NULL NULL 30353 2044473567 +NULL NULL 31443 -1968665833 +NULL NULL 3412 -1196037018 +NULL NULL 3412 -1249487623 +NULL NULL 3412 -2081156563 +NULL NULL 3412 -2132472060 +NULL NULL 3412 1253976194 +NULL NULL 3890 1411429004 +NULL NULL 4586 -586336015 +NULL NULL 4779 -1995259010 +NULL NULL 4902 1078466156 +NULL NULL 5957 -1995259010 +NULL NULL 8177 -1995259010 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL +SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Map 4 (CUSTOM_SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Statistics: Num rows: 17 Data size: 1729 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key0:timestamp, key1:smallint, key2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: s + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key0:timestamp, 1:key1:smallint, 2:key2:string, 3:s_decimal:decimal(38,18), 4:ROW__ID:struct] + Select Operator + expressions: key0 (type: timestamp), key1 (type: smallint), key2 (type: string), s_decimal (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + sort order: +++ + Map-reduce partition columns: _col0 (type: timestamp), _col1 (type: smallint), _col2 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:timestamp, 1:smallint, 2:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 3:decimal(38,18) + Statistics: Num rows: 118 Data size: 28216 Basic stats: COMPLETE Column stats: NONE + value expressions: _col3 (type: decimal(38,18)) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 4 + includeColumns: [0, 1, 2, 3] + dataColumns: key0:timestamp, key1:smallint, key2:string, s_decimal:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aaa + reduceColumnSortOrder: +++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, KEY.reducesinkkey2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [timestamp, bigint, string, decimal(38,18)] + Reduce Operator Tree: + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + 1 KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), KEY.reducesinkkey2 (type: string) + Map Join Vectorization: + bigTableKeyExpressions: col 0:timestamp, col 1:smallint, col 2:string + bigTableValueExpressions: col 0:timestamp, col 1:smallint, col 2:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + DynamicPartitionHashJoin: true + Reduce Output Operator + key expressions: _col0 (type: timestamp), _col1 (type: smallint) + sort order: ++ + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:timestamp, 1:smallint + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:string, 3:timestamp, 4:smallint, 5:string, 6:decimal(38,18) + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: string), _col3 (type: timestamp), _col4 (type: smallint), _col5 (type: string), _col6 (type: decimal(38,18)) + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 7 + dataColumns: KEY.reducesinkkey0:timestamp, KEY.reducesinkkey1:smallint, VALUE._col0:string, VALUE._col1:timestamp, VALUE._col2:smallint, VALUE._col3:string, VALUE._col4:decimal(38,18) + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: smallint), VALUE._col0 (type: string), VALUE._col1 (type: timestamp), VALUE._col2 (type: smallint), VALUE._col3 (type: string), VALUE._col4 (type: decimal(38,18)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 129 Data size: 31037 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +PREHOOK: type: QUERY +PREHOOK: Input: default@fullouter_multikey_big_1b +PREHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +POSTHOOK: query: SELECT b.key0, b.key1, b.key2, s.key0, s.key1, s.key2, s.s_decimal FROM fullouter_multikey_big_1b b FULL OUTER JOIN fullouter_multikey_small_1b s ON b.key0 = s.key0 AND b.key1 = s.key1 AND b.key2 = s.key2 +order by b.key0, b.key1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@fullouter_multikey_big_1b +POSTHOOK: Input: default@fullouter_multikey_small_1b +#### A masked pattern was here #### +2061-12-19 22:10:32.000628309 21635 ANCO NULL NULL NULL NULL +2082-07-14 04:00:40.695380469 12556 NCYBDW NULL NULL NULL NULL +2093-04-10 23:36:54.846 1446 GHZVPWFO NULL NULL NULL NULL +2093-04-10 23:36:54.846 28996 Q NULL NULL NULL NULL +2093-04-10 23:36:54.846 NULL NULL NULL NULL NULL NULL +2188-06-04 15:03:14.963259704 9468 AAA 2188-06-04 15:03:14.963259704 9468 AAA 2.754963520000000000 +2299-11-15 16:41:30.401 -31077 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 -6909 NCYBDW NULL NULL NULL NULL +2306-06-21 11:02:00.143124239 1446 NULL NULL NULL NULL NULL +2608-02-23 23:44:02.546440891 26184 NCYBDW NULL NULL NULL NULL +2686-05-23 07:46:46.565832918 13212 NCYBDW 2686-05-23 07:46:46.565832918 13212 NCYBDW -917116793.400000000000000000 +2686-05-23 07:46:46.565832918 NULL GHZVPWFO NULL NULL NULL NULL +2898-10-01 22:27:02.000871113 10361 NCYBDW NULL NULL NULL NULL +NULL -6909 NULL NULL NULL NULL NULL +NULL 21635 ANCO NULL NULL NULL NULL +NULL NULL CCWYD NULL NULL NULL NULL +NULL NULL NULL 1905-04-20 13:42:24.000469776 2638 KAUUFF 7.000000000000000000 +NULL NULL NULL 1919-06-20 00:16:50.611028595 20223 ZKBC -23.000000000000000000 +NULL NULL NULL 1931-12-04 11:13:47.269597392 23196 HVJCQMTQL -9697532.899400000000000000 +NULL NULL NULL 1941-10-16 02:19:35.000423663 -24459 AO -821445414.457971200000000000 +NULL NULL NULL 1957-02-01 14:00:28.000548421 -16085 ZVEUKC -2312.814900000000000000 +NULL NULL NULL 1957-03-06 09:57:31 -26373 NXLNNSO 2.000000000000000000 +NULL NULL NULL 1980-09-13 19:57:15 NULL M 57650.772300000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 7506645.953700000000000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -12202 VBDBM 98790.713907420831000000 +NULL NULL NULL 2018-11-25 22:27:55.84 -22419 LOTLS 342.372604022858400000 +NULL NULL NULL 2038-10-12 09:15:33.000539653 -19598 YKNIAJW -642807895924.660000000000000000 +NULL NULL NULL 2044-05-02 07:00:03.35 -8751 ZSMB -453797242.029791752000000000 +NULL NULL NULL 2071-07-21 20:02:32.000250697 2638 NRUV -66198.351092000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 5.000000000000000000 +NULL NULL NULL 2073-03-21 15:32:57.617920888 26425 MPRACIRYW 726945733.419300000000000000 +NULL NULL NULL 2075-10-25 20:32:40.000792874 NULL NULL 226612651968.360760000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR -394.086700000000000000 +NULL NULL NULL 2083-06-07 09:35:19.383 -26373 MR 67892053.023760940000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV -85184687349898.892000000000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 0.439686100000000000 +NULL NULL NULL 2086-04-09 00:03:10 20223 THXNJGFFV 482.538341135921900000 +NULL NULL NULL 2105-01-04 16:27:45 23100 ZSMB -83.232800000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 NULL -9784.820000000000000000 +NULL NULL NULL 2145-10-15 06:58:42.831 2638 UANGISEXR -5996.306000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -1515597428.000000000000000000 +NULL NULL NULL 2169-04-02 06:30:32 23855 PDVQATOS -4016.960800000000000000 +NULL NULL NULL 2201-07-05 17:22:06.084206844 -24459 UBGT 1.506948328200000000 +NULL NULL NULL 2238-05-17 19:27:25.519 20223 KQCM -0.010950000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ -0.261490000000000000 +NULL NULL NULL 2242-08-04 07:51:46.905 20223 UCYXACQ 37.728800000000000000 +NULL NULL NULL 2266-09-26 06:27:29.000284762 20223 EDYJJN 14.000000000000000000 +NULL NULL NULL 2301-06-03 17:16:19 15332 ZVEUKC 0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 -13125 JFYW 6.086657000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR -0.500000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 11101 YJCKKCR 1279917802.420000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 12587 OPW -4.594895040000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T -0.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 2720.800000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 1301 T 61.302000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G -4319470286240016.300000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 15090 G 975.000000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 30285 GSJPSIYOU 0.200000000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO -0.435500000000000000 +NULL NULL NULL 2304-12-15 15:31:16 8650 RLNO 0.713517473350000000 +NULL NULL NULL 2309-01-15 12:43:49 22821 ZMY 40.900000000000000000 +NULL NULL NULL 2332-06-14 07:02:42.32 -26373 XFFFDTQ 56845106806308.900000000000000000 +NULL NULL NULL 2333-07-28 09:59:26 23196 RKSK 37872288434740893.500000000000000000 +NULL NULL NULL 2338-02-12 09:30:07 20223 CTH -6154.763054000000000000 +NULL NULL NULL 2340-12-15 05:15:17.133588982 23663 HHTP 33383.800000000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 74179461.880493000000000000 +NULL NULL NULL 2355-09-23 19:52:34.638084141 -19598 H 92.150000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -32.460000000000000000 +NULL NULL NULL 2357-05-08 07:09:09.000482799 6226 ZSMB -472.000000000000000000 +NULL NULL NULL 2391-01-17 15:28:37.00045143 16160 ZVEUKC 771355639420297.133000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB -5151598.347000000000000000 +NULL NULL NULL 2396-04-06 15:39:02.404013577 29661 ZSMB 0.767183260000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -162.950000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR -9926693851.000000000000000000 +NULL NULL NULL 2409-09-23 10:33:27 2638 XSXR 0.400000000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR -769088.176482000000000000 +NULL NULL NULL 2410-05-03 13:44:56 2638 PHOR 93262.914526611000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -9575827.553960000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB -991.436050000000000000 +NULL NULL NULL 2461-03-09 09:54:45.000982385 -16454 ZSMB 8694.890000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB -582687.000000000000000000 +NULL NULL NULL 2462-12-16 23:11:32.633305644 -26373 CB 67.417990000000000000 +NULL NULL NULL 2467-05-11 06:04:13.426693647 23196 EIBSDASR -8.554888380100000000 +NULL NULL NULL 2480-10-02 09:31:37.000770961 -26373 NBN -5875.519725200000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ -49.512190000000000000 +NULL NULL NULL 2512-10-06 03:03:03 -3465 VZQ 0.445800000000000000 +NULL NULL NULL 2512-10-06 03:03:03 13195 CRJ 14.000000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X -922.695158410700000000 +NULL NULL NULL 2512-10-06 03:03:03 1560 X 761196.522000000000000000 +NULL NULL NULL 2512-10-06 03:03:03 24313 QBHUG -8423.151573236000000000 +NULL NULL NULL 2512-10-06 03:03:03 32099 ARNZ -0.410000000000000000 +NULL NULL NULL 2525-05-12 15:59:35 -24459 SAVRGA 53106747151.863300000000000000 +NULL NULL NULL 2535-03-01 05:04:49.000525883 23663 ALIQKNXHE -0.166569100000000000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 41.774515077866460000 +NULL NULL NULL 2629-04-07 01:54:11 -6776 WGGFVFTW 6.801285170800000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -12923 PPTJPFR 5.400000000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 -17786 HYEGQ -84.169614329419000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 7362887891522.378200000000000000 +NULL NULL NULL 2637-03-12 22:25:46.385 21841 CXTI 749563668434009.650000000000000000 +NULL NULL NULL 2668-06-25 07:12:37.000970744 2638 TJE -2.779682700000000000 +NULL NULL NULL 2688-02-06 20:58:42.000947837 20223 PAIY 67661.735000000000000000 +NULL NULL NULL 2743-12-27 05:16:19.000573579 -12914 ZVEUKC -811984611.517849700000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 368.000000000000000000 +NULL NULL NULL 2759-11-26 22:19:55.410967136 -27454 ZMY 60.602579700000000000 +NULL NULL NULL 2808-07-09 02:10:11.928498854 -19598 FHFX 0.300000000000000000 +NULL NULL NULL 2829-06-04 08:01:47.836 22771 ZVEUKC 94317.753180000000000000 +NULL NULL NULL 2861-05-27 07:13:01.000848622 -19598 WKPXNLXS 29399.000000000000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -4244.926206619000000000 +NULL NULL NULL 2882-05-20 07:21:25.221299462 23196 U -9951044.000000000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC -56082455.033918000000000000 +NULL NULL NULL 2888-05-08 08:36:55.182302102 5786 ZVEUKC 57.621752577880370000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 51.732330327300000000 +NULL NULL NULL 2897-08-10 15:21:47.09 23663 XYUVBED 6370.000000000000000000 +NULL NULL NULL 2898-12-18 03:37:17 -24459 MHNBXPBM 14.236693562384810000 +NULL NULL NULL 2913-07-17 15:06:58.041 -10206 NULL -0.200000000000000000 +NULL NULL NULL 2938-12-21 23:35:59.498 29362 ZMY 0.880000000000000000 +NULL NULL NULL 2957-05-07 10:41:46 20223 OWQT -586953.153681000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF -96.300000000000000000 +NULL NULL NULL 2960-04-12 07:03:42.000366651 20340 CYZYUNSF 2.157765900000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -18138 VDPN 8924831210.427680190000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -32485 AGEPWWLJF -48431309405.652522000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -375994644577.315257000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ -81.000000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 -8913 UIMQ 9.178000000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 14500 WXLTRFQP -23.819800000000000000 +NULL NULL NULL 2969-01-23 14:08:04.000667259 6689 TFGVOGPJF -0.010000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -27394351.300000000000000000 +NULL NULL NULL 2971-02-14 09:13:19 -16605 BVACIRP -5.751278023000000000 +NULL NULL NULL NULL -12914 ZVEUKC 221.000000000000000000 +NULL NULL NULL NULL NULL NULL -2.400000000000000000 +NULL NULL NULL NULL NULL NULL -2207.300000000000000000 +NULL NULL NULL NULL NULL NULL NULL diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out index 9a2f5d8..c7085bd 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id1.q.out @@ -84,10 +84,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -243,10 +242,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -402,10 +400,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -555,10 +552,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -708,10 +704,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -868,10 +863,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out index 6005fb2..8f4874c 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id2.q.out @@ -87,11 +87,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -147,11 +147,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -289,11 +289,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -349,11 +349,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:int, 1:int + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -501,11 +501,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -558,11 +557,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -620,11 +618,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -663,10 +661,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -808,11 +806,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -865,11 +862,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -927,11 +923,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:double + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 4 @@ -970,10 +966,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 5 @@ -1111,11 +1107,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1168,11 +1163,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1180,11 +1174,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1228,10 +1221,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1251,6 +1243,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1293,10 +1288,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1439,11 +1433,10 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:double Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1496,11 +1489,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -1508,11 +1500,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [] + partitionColumns: 0:int, 1:int Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap @@ -1556,10 +1547,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 4 Execution mode: llap @@ -1579,6 +1569,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -1621,10 +1614,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -1760,10 +1752,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -1926,10 +1918,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2004,10 +1995,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Reducer 3 @@ -2144,10 +2135,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) @@ -2155,10 +2145,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -2219,10 +2208,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap @@ -2242,6 +2230,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -2284,10 +2275,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 72 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out index d8e6b3f..0d5dd91 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_id3.q.out @@ -101,11 +101,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), 1L (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 4] + keyColumns: 0:int, 1:int, 4:bigint keyExpressions: ConstantVectorExpression(val 1) -> 4:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -270,10 +270,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out index 3586eae..198c826 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets1.q.out @@ -102,10 +102,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -264,10 +264,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -426,10 +426,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -588,10 +588,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -744,10 +744,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -900,10 +899,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1041,10 +1039,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out index b072ffc..3b1d8af 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets2.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -147,10 +147,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -269,10 +269,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -328,10 +328,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -503,10 +503,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:double Statistics: Num rows: 24 Data size: 13248 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: double) Reducer 3 @@ -673,10 +673,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -732,10 +732,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 2232 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out index 80ecd59..a0303e0 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3.q.out @@ -366,10 +366,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 26496 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out index dabc987..aa19a38 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets3_dec.q.out @@ -95,10 +95,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap @@ -233,10 +233,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Execution mode: vectorized, llap @@ -397,10 +397,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:struct, 3:bigint Statistics: Num rows: 12 Data size: 5760 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct), _col3 (type: bigint) Execution mode: vectorized, llap @@ -456,10 +456,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4] + valueColumns: 3:struct, 4:bigint Statistics: Num rows: 48 Data size: 23040 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: struct), _col4 (type: bigint) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out index b896193..47348f1 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets4.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -108,10 +108,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -183,10 +183,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -207,6 +207,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -259,10 +262,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -340,10 +343,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -352,10 +355,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -427,10 +430,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 3 @@ -451,6 +454,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -503,10 +509,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) @@ -615,10 +621,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Execution mode: vectorized, llap @@ -674,10 +680,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reduce Output Operator @@ -686,10 +692,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 8 Data size: 2944 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -744,10 +750,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) Reducer 4 @@ -768,6 +774,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 5 Execution mode: vectorized, llap Reduce Vectorization: @@ -820,10 +829,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 4 Data size: 1472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out index 8da5735..7eca699 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets5.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -158,10 +157,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -280,10 +279,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -350,10 +348,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -499,10 +497,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -569,10 +566,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: bigint) Reducer 3 @@ -611,10 +608,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out index 6c4ae65..7323752 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets6.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -231,10 +230,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out index 7f7624a..c10b455 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_grouping.q.out @@ -88,10 +88,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -249,10 +248,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -417,10 +415,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -586,10 +583,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -657,10 +653,10 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -792,10 +788,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -953,10 +948,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1128,10 +1122,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1290,10 +1283,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1354,11 +1346,11 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:bigint, 4:int keyExpressions: IfExprColumnNull(col 3:boolean, col 0:int, null)(children: LongColEqualLongScalar(col 5:bigint, val 1) -> 3:boolean, col 0:int) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:int, 1:int Statistics: Num rows: 12 Data size: 96 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: int) Reducer 3 @@ -1490,10 +1482,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1656,10 +1647,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 24 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1822,10 +1812,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1983,10 +1972,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: int), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:int, 1:int, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 144 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out index e67bca7..afa13ab 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_sets_limit.q.out @@ -88,10 +88,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -156,10 +156,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -289,10 +289,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 24 Data size: 8832 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -357,10 +357,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -490,10 +490,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 12 Data size: 4416 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col3 (type: bigint) @@ -558,10 +558,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: bigint) @@ -689,10 +689,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 18 Data size: 9936 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -754,10 +753,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 9 Data size: 4968 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -884,10 +882,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap @@ -940,10 +937,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3 Data size: 552 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Reducer 3 @@ -1068,10 +1064,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 6 Data size: 2208 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) @@ -1127,10 +1123,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 3 Data size: 1104 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) diff --git ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out index dc3363d..8c5c1da 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_grouping_window.q.out @@ -86,10 +86,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int), _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:int, 3:int Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: int) Execution mode: vectorized, llap @@ -153,11 +153,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:int, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:int Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out index 98e6e54..21cc5dc 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_mapjoin.q.out @@ -168,6 +168,9 @@ STAGE PLANS: sort order: + Statistics: Num rows: 500 Data size: 89000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out index e839214..3f8bc33 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby_rollup1.q.out @@ -84,10 +84,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -368,11 +368,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -428,11 +428,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -743,11 +743,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Select Operator @@ -778,11 +778,11 @@ STAGE PLANS: Map-reduce partition columns: rand() (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [3] + partitionColumns: 4:double + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Execution mode: vectorized, llap @@ -838,11 +838,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 3 @@ -931,11 +931,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1] - valueColumnNums: [3] + partitionColumns: 0:string, 1:string + valueColumns: 3:bigint Statistics: Num rows: 18 Data size: 6624 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint) Reducer 5 diff --git ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out index 4a3bc02..294faf7 100644 --- ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out +++ ql/src/test/results/clientpositive/llap/vector_include_no_sel.q.out @@ -269,6 +269,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_inner_join.q.out ql/src/test/results/clientpositive/llap/vector_inner_join.q.out index bb555df..59ed1ce 100644 --- ql/src/test/results/clientpositive/llap/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_inner_join.q.out @@ -84,12 +84,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 2 @@ -158,10 +160,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -250,13 +251,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -329,10 +332,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -457,13 +459,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -532,10 +536,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -624,10 +628,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -677,14 +681,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 @@ -786,14 +792,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -863,10 +871,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -956,14 +964,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1032,10 +1042,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1125,14 +1135,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1201,10 +1213,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1293,10 +1305,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1346,14 +1358,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 0 Map 1 @@ -1462,10 +1476,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -1515,14 +1529,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0, 1] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index cfe3d5f..d48d810 100644 --- ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -229,6 +229,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/llap/vector_join30.q.out ql/src/test/results/clientpositive/llap/vector_join30.q.out index 5fb8258..07e54b9 100644 --- ql/src/test/results/clientpositive/llap/vector_join30.q.out +++ ql/src/test/results/clientpositive/llap/vector_join30.q.out @@ -10,7 +10,7 @@ POSTHOOK: Output: database:default POSTHOOK: Output: default@orcsrc POSTHOOK: Lineage: orcsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: orcsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -18,7 +18,7 @@ JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -50,6 +50,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -68,11 +69,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -85,6 +89,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -92,6 +102,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -110,13 +121,24 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -135,10 +157,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -152,14 +177,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -190,26 +228,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -217,7 +236,7 @@ LEFT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -249,6 +268,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -260,13 +280,25 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [string], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:string, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 1 Map 3 @@ -285,10 +317,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -302,6 +337,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [string, string, bigint] Map 3 Map Operator Tree: TableScan @@ -309,6 +350,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -320,11 +362,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: true Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -338,14 +385,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -376,26 +436,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -403,7 +444,7 @@ RIGHT OUTER JOIN ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -435,6 +476,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -446,11 +488,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -463,6 +508,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -470,6 +521,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -481,13 +533,24 @@ STAGE PLANS: Map Join Operator condition map: Right Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:string className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:string, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col2, _col3 input vertices: 0 Map 1 @@ -506,10 +569,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -523,14 +589,27 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -561,26 +640,207 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -103231310608 -PREHOOK: query: explain vectorization expression + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 550 Data size: 96342 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -591,7 +851,7 @@ JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -626,6 +886,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -644,11 +905,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -661,6 +925,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -668,6 +938,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -687,6 +958,12 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Inner Join 0 to 2 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false], 2:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -717,10 +994,13 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -734,6 +1014,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: true vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -741,6 +1027,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Filter Operator Filter Vectorization: className: VectorFilterOperator @@ -759,11 +1046,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 475 Data size: 83204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -776,14 +1066,27 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -814,32 +1117,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -850,7 +1128,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x JOIN @@ -885,6 +1163,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -896,11 +1175,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -913,6 +1195,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -920,6 +1208,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -931,11 +1220,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -949,6 +1243,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -956,6 +1256,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -967,11 +1268,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -984,6 +1288,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -991,6 +1301,10 @@ STAGE PLANS: condition map: Inner Join 0 to 1 Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1004,16 +1318,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1044,32 +1370,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1080,7 +1381,7 @@ LEFT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1115,6 +1416,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1126,11 +1428,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1143,6 +1448,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1150,6 +1461,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1161,11 +1473,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1179,6 +1496,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1186,6 +1509,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1197,11 +1521,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1214,6 +1541,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1221,6 +1554,10 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1234,16 +1571,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1274,32 +1623,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1310,7 +1634,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x LEFT OUTER JOIN @@ -1345,6 +1669,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1356,11 +1681,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1373,6 +1701,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1380,6 +1714,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1391,11 +1726,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1409,6 +1749,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1416,6 +1762,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1427,11 +1774,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1444,6 +1794,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1451,6 +1807,10 @@ STAGE PLANS: condition map: Left Outer Join 0 to 1 Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1464,16 +1824,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1504,32 +1876,7 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM -(SELECT orcsrc.* FROM orcsrc sort by key) x -LEFT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Y -ON (x.key = Y.key) -RIGHT OUTER JOIN -(SELECT orcsrc.* FROM orcsrc sort by value) Z -ON (x.key = Z.key) -select sum(hash(Y.key,Y.value)) -POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc -#### A masked pattern was here #### -348019368476 -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1540,7 +1887,7 @@ RIGHT OUTER JOIN ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail debug FROM (SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN @@ -1575,6 +1922,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1586,11 +1934,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1603,6 +1954,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 4 Map Operator Tree: TableScan @@ -1610,6 +1967,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string), value (type: string) outputColumnNames: _col0, _col1 @@ -1621,11 +1979,16 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -1639,6 +2002,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 5 Map Operator Tree: TableScan @@ -1646,6 +2015,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] Select Operator expressions: key (type: string) outputColumnNames: _col0 @@ -1657,11 +2027,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1674,6 +2047,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: @@ -1681,6 +2060,10 @@ STAGE PLANS: condition map: Right Outer Join 0 to 1 Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1694,16 +2077,28 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: + output value column names: VALUE._col0 + skipTag: false Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: sum(VALUE._col0) @@ -1734,28 +2129,1521 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: FROM +PREHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) -RIGHT OUTER JOIN +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) PREHOOK: type: QUERY -PREHOOK: Input: default@orcsrc -#### A masked pattern was here #### -POSTHOOK: query: FROM +POSTHOOK: query: explain vectorization detail debug +FROM (SELECT orcsrc.* FROM orcsrc sort by key) x -RIGHT OUTER JOIN +JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Inner Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Left Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +LEFT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x +RIGHT OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Y ON (x.key = Y.key) +FULL OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Z +ON (x.key = Z.key) +select sum(hash(Y.key,Y.value)) +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +FROM +(SELECT orcsrc.* FROM orcsrc sort by key) x RIGHT OUTER JOIN +(SELECT orcsrc.* FROM orcsrc sort by value) Y +ON (x.key = Y.key) +FULL OUTER JOIN (SELECT orcsrc.* FROM orcsrc sort by value) Z ON (x.key = Z.key) select sum(hash(Y.key,Y.value)) POSTHOOK: type: QUERY -POSTHOOK: Input: default@orcsrc +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -348019368476 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string), value (type: string) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: false + Statistics: Num rows: 500 Data size: 175168 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 + Map Operator Tree: + TableScan + alias: orcsrc + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:string, 1:value:string, 2:ROW__ID:struct] + Select Operator + expressions: key (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: string) + Reduce Sink Vectorization: + className: VectorReduceSinkStringOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 500 Data size: 87584 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:string, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Right Outer Join 0 to 1 + Full Outer Join 0 to 2 + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + 2 [Column[_col0]] + keys: + 0 _col0 (type: string) + 1 _col0 (type: string) + 2 _col0 (type: string) + outputColumnNames: _col2, _col3 + Statistics: Num rows: 1100 Data size: 192684 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(hash(_col2,_col3)) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/llap/vector_join_filters.q.out ql/src/test/results/clientpositive/llap/vector_join_filters.q.out index 4e5205f..04c506e 100644 --- ql/src/test/results/clientpositive/llap/vector_join_filters.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_filters.q.out @@ -47,15 +47,215 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4937935 Warning: Map Join MAPJOIN[16][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3080335 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int)) + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0, VALUE._col1 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int + skipTag: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 3]] + valueContexts: [0:[types [int, int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keyExpressions: + 0 [] + 1 [] + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 4 Data size: 68 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -128,42 +328,878 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4937935 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int)) + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 3]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col1]] + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int)) + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 3]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int) + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3080335 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int)) + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 3]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keyExpressions: + 0 [Column[_col1]] + 1 [Column[_col1]] + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 4 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3080335 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int)) + predicate: ((key = value) and (key > 40) and (value > 50)) (type: boolean) + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 4 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int, int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 3]] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {(_col0 > 40)} {(_col1 > 50)} {(_col0 = _col1)} + keyExpressions: + 0 [Column[_col0], Column[_col1]] + 1 [Column[_col0], Column[_col1]] + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 0:int, val 40), FilterLongColGreaterLongScalar(col 1:int, val 50), FilterLongColEqualLongColumn(col 0:int, col 1:int) + bigTableKeyColumns: 0:int, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3, 1 -> 4 + projectedOutput: 3:int, 4:int, 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 4 Data size: 35 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out index 056360f..7bda1e8 100644 --- ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out +++ ql/src/test/results/clientpositive/llap/vector_join_nulls.q.out @@ -47,15 +47,208 @@ POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 13630578 Warning: Map Join MAPJOIN[14][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -13630578 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0, VALUE._col1 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int + skipTag: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 1]] + valueContexts: [0:[types [int, int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + filter predicates: + 0 + 1 {true} + keyExpressions: + 0 [] + 1 [] + keys: + 0 + 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [4] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 4:int + Statistics: Num rows: 9 Data size: 153 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 4:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -128,42 +321,836 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4542003 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3079923 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col1]] + keys: + 0 _col0 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key = b.key POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -4509891 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 4:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -3113558 -PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value -PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[_col1]] + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 4 + projectedOutput: 3:int, 4:int, 0:int, 1:int + smallTableValueMapping: 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG + +SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key=b.key and a.value = b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### -3079923 + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: int) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Select Operator + expressions: key (type: int), value (type: int) + outputColumnNames: _col0, _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int, int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0], Column[_col1]] + 1 [Column[_col0], Column[_col1]] + keys: + 0 _col0 (type: int), _col1 (type: int) + 1 _col0 (type: int), _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int + className: VectorMapJoinOuterMultiKeyOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3, 1 -> 4 + projectedOutput: 3:int, 4:int, 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col2, _col3 + input vertices: + 0 Map 1 + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hash(_col0,_col1,_col2,_col3) (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [5] + selectExpressions: VectorUDFAdaptor(hash(_col0,_col1,_col2,_col3)) -> 5:int + Statistics: Num rows: 3 Data size: 26 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 5:int) -> bigint + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [0] + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + output value column names: VALUE._col0 + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:bigint + skipTag: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: true + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, bigint] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: + reduceColumnSortOrder: + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: VALUE._col0:bigint + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + Group By Vectorization: + aggregators: VectorUDAFSumLong(col 0:bigint) -> bigint + className: VectorGroupByOperator + groupByMode: MERGEPARTIAL + native: false + vectorProcessingMode: GLOBAL + projectedOutputColumnNums: [0] + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out index 821ea3a..1775c1d 100644 --- ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -167,15 +170,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -261,15 +266,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -293,6 +300,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -350,6 +358,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -357,6 +371,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -371,8 +386,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -386,6 +403,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -403,15 +426,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -435,6 +460,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -492,6 +518,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -499,6 +531,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -513,8 +546,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -528,6 +563,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -545,15 +586,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -577,6 +620,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -595,9 +639,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -631,6 +682,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -638,6 +695,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -652,8 +710,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -667,6 +727,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -684,15 +750,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -716,6 +784,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 36 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -734,9 +803,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: + bigTableFilterExpressions: FilterLongColGreaterLongScalar(col 2:int, val 15) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int, 2:int, 4:char(2) + smallTableValueMapping: 4:char(2) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col4 input vertices: 1 Map 2 @@ -770,6 +846,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Map 2 Map Operator Tree: TableScan @@ -777,6 +859,7 @@ STAGE PLANS: Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:char(2), 3:ROW__ID:struct] Select Operator expressions: c1 (type: int), c2 (type: char(2)) outputColumnNames: _col0, _col1 @@ -791,8 +874,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:char(2) Statistics: Num rows: 4 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: char(2)) Execution mode: vectorized, llap @@ -806,6 +891,12 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [1, 2] + dataColumns: rnum:int, c1:int, c2:char(2) + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -823,6 +914,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out index 17704e5..a778a30 100644 --- ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vector_leftsemi_mapjoin.q.out @@ -595,6 +595,57 @@ NULL NULL NULL PREHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only summary @@ -763,6 +814,70 @@ NULL NULL NULL PREHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only summary +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +NULL +NULL +NULL +PREHOOK: query: explain vectorization only summary select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only summary @@ -1964,25 +2079,32 @@ STAGE PLANS: Map Join Operator condition map: Inner Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: llap LLAP IO: all inputs Map 3 @@ -2031,10 +2153,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2222,20 +2344,27 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 - Left Semi Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) - 2 _col0 (type: int) outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2292,10 +2421,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2364,20 +2493,30 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 - Left Semi Join 1 to 2 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 + outputColumnNames: _col0, _col5 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 3 @@ -2397,20 +2536,23 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2419,10 +2561,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2491,7 +2633,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2524,20 +2666,23 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2545,28 +2690,39 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2620,10 +2776,10 @@ NULL NULL NULL PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2638,7 +2794,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -2646,33 +2802,24 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: key (type: int) sort order: + Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Map 5 @@ -2680,11 +2827,23 @@ STAGE PLANS: TableScan alias: c Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -2692,28 +2851,39 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Left Outer Join 0 to 2 + Full Outer Join 0 to 1 keys: 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: llap Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2725,13 +2895,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2759,18 +2929,18 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 +NULL +NULL +NULL PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2785,8 +2955,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 @@ -2794,35 +2964,60 @@ STAGE PLANS: TableScan alias: a Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 3 Map Operator Tree: TableScan alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + Filter Operator + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -2837,30 +3032,13 @@ STAGE PLANS: Reducer 2 Execution mode: llap Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 - Execution mode: llap - Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2872,13 +3050,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -2906,20 +3084,18 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -NULL -NULL PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -2989,7 +3165,7 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Right Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) @@ -3021,13 +3197,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -3055,31 +3231,20 @@ POSTHOOK: Input: default@t3 10 10 10 -10 -10 -10 -10 -10 -10 -10 -10 -2 4 4 -5 -5 -5 8 8 -9 +NULL +NULL NULL NULL NULL PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -3094,45 +3259,37 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 keys: 0 key (type: int) 1 _col0 (type: int) - outputColumnNames: _col0, _col1 + outputColumnNames: _col0 input vertices: - 1 Map 3 - Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Outer Join 0 to 1 - keys: - 0 _col1 (type: string) - 1 value (type: string) - outputColumnNames: _col0 - input vertices: - 1 Map 4 - Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 3 + Map 4 Map Operator Tree: TableScan alias: b @@ -3156,28 +3313,43 @@ STAGE PLANS: Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs - Map 4 + Map 5 Map Operator Tree: TableScan alias: c - Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: value (type: string) + key expressions: key (type: int) sort order: + - Map-reduce partition columns: value (type: string) - Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: llap LLAP IO: all inputs Reducer 2 Execution mode: llap Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: Select Operator expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 - Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3189,13 +3361,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -3229,18 +3401,25 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 +NULL +NULL +NULL PREHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization summary -select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: false @@ -3255,17 +3434,299 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 2 (BROADCAST_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((key > 100) and value is not null) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 5 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + Map-reduce partition columns: key (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col0 (type: int) + 1 key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 3948 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keys: + 0 key (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 4342 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: string) + 1 value (type: string) + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Map-reduce partition columns: _col0 (type: int) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: value (type: string) + sort order: + + Map-reduce partition columns: value (type: string) + Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +10 +10 +10 +10 +10 +10 +16 +18 +20 +4 +4 +8 +8 +PREHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization summary +select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: false + enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 2 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((key > 100) and value is not null) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -3359,6 +3820,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3472,6 +3934,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3587,6 +4050,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3694,6 +4158,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3815,6 +4280,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -3955,6 +4421,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4060,23 +4527,245 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 2 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 2 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + + Stage: Stage-0 + Fetch Operator + +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization only operator +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Vertices: + Map 1 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Map 3 Map Operator Tree: TableScan Vectorization: native: true Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Select Vectorization: - className: VectorSelectOperator - native: true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -4090,7 +4779,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4109,21 +4798,40 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 PREHOOK: query: explain vectorization only operator -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4137,8 +4845,8 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: @@ -4147,18 +4855,16 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Select Vectorization: - className: VectorSelectOperator - native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4170,22 +4876,25 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 2 + Map 3 Map Operator Tree: TableScan Vectorization: native: true Filter Vectorization: className: VectorFilterOperator native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4197,7 +4906,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -4216,26 +4925,25 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 +0 val_0 +0 val_0 +0 val_0 +8 val_8 PREHOOK: query: explain vectorization only operator -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4249,7 +4957,7 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 @@ -4260,14 +4968,24 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Select Vectorization: + className: VectorSelectOperator + native: true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4286,6 +5004,28 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true + Filter Vectorization: + className: VectorFilterOperator + native: true Select Vectorization: className: VectorSelectOperator native: true @@ -4328,40 +5068,37 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 PREHOOK: query: explain vectorization only operator -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4386,6 +5123,7 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -4421,7 +5159,7 @@ STAGE PLANS: native: false vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -4454,25 +5192,35 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 8 val_8 +9 val_9 PREHOOK: query: explain vectorization only operator -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4497,13 +5245,17 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Select Vectorization: - className: VectorSelectOperator - native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator native: true @@ -4526,10 +5278,18 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4538,7 +5298,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Map 4 @@ -4590,37 +5350,37 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 PREHOOK: query: explain vectorization only operator -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4634,25 +5394,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Filter Vectorization: - className: VectorFilterOperator - native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Filter Vectorization: + className: VectorFilterOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4668,6 +5435,25 @@ STAGE PLANS: Map Operator Tree: TableScan Vectorization: native: true + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + Map 4 + Map Operator Tree: + TableScan Vectorization: + native: true Filter Vectorization: className: VectorFilterOperator native: true @@ -4680,7 +5466,7 @@ STAGE PLANS: native: false vectorProcessingMode: HASH Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator + className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap @@ -4713,35 +5499,49 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization only operator -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only operator -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4754,26 +5554,32 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Filter Vectorization: - className: VectorFilterOperator + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - Map Join Vectorization: - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4782,28 +5588,35 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 3 + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Filter Vectorization: - className: VectorFilterOperator + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - Select Vectorization: - className: VectorSelectOperator - native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4812,28 +5625,62 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 5 Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator Filter Vectorization: className: VectorFilterOperator native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH + keyExpressions: col 0:int native: false vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4845,32 +5692,102 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator + limit: -1 + Processor Tree: + ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -4882,6 +5799,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -4890,11 +5819,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization only operator -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization only operator -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -4907,23 +5839,76 @@ STAGE DEPENDENCIES: STAGE PLANS: Stage: Stage-1 Tez +#### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) +#### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4935,14 +5920,43 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 3 + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 6 Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4954,22 +5968,59 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Map 4 + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 7 Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -4981,68 +6032,151 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Reducer 2 + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - - Stage: Stage-0 - Fetch Operator - -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -4 -4 -8 -8 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: explain vectorization only operator -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5056,17 +6190,32 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5075,17 +6224,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 3 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5094,25 +6254,17 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 4 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5121,11 +6273,10 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true Reducer 2 - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -5144,13 +6295,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -5178,18 +6329,18 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL PREHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5272,6 +6423,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5291,13 +6445,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -5325,18 +6479,20 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 +NULL +NULL +NULL +NULL +NULL PREHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY POSTHOOK: query: explain vectorization only operator -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -5350,17 +6506,27 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5369,25 +6535,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true Map 4 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5419,6 +6588,9 @@ STAGE PLANS: usesVectorUDFAdaptor: false vectorized: true Reducer 2 + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5438,13 +6610,13 @@ STAGE PLANS: Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -5472,12 +6644,23 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 -NULL -NULL +9 NULL NULL NULL @@ -5499,17 +6682,37 @@ STAGE PLANS: Stage: Stage-1 Tez Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) Vertices: Map 1 Map Operator Tree: TableScan Vectorization: native: true - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5518,25 +6721,28 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 4 + Map 5 Map Operator Tree: TableScan Vectorization: native: true - Select Vectorization: - className: VectorSelectOperator + Filter Vectorization: + className: VectorFilterOperator native: true - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - native: false - vectorProcessingMode: HASH - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Select Vectorization: + className: VectorSelectOperator + native: true + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + native: false + vectorProcessingMode: HASH + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5548,7 +6754,7 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true - Map 5 + Map 6 Map Operator Tree: TableScan Vectorization: native: true @@ -5556,6 +6762,10 @@ STAGE PLANS: className: VectorReduceSinkLongOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -5567,7 +6777,6 @@ STAGE PLANS: allNative: true usesVectorUDFAdaptor: false vectorized: true - Reducer 2 Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -5583,64 +6792,33 @@ STAGE PLANS: File Sink Vectorization: className: VectorFileSinkOperator native: false + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + Reduce Operator Tree: + Select Vectorization: + className: VectorSelectOperator + native: true + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + Union 2 Stage: Stage-0 Fetch Operator -PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -PREHOOK: type: QUERY -PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 -PREHOOK: Input: default@t3 -#### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key -POSTHOOK: type: QUERY -POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 -POSTHOOK: Input: default@t3 -#### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -NULL -NULL -NULL PREHOOK: query: explain vectorization only operator select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY @@ -5670,11 +6848,13 @@ STAGE PLANS: className: VectorFilterOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 0:int className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true @@ -5840,6 +7020,7 @@ STAGE PLANS: className: VectorSelectOperator native: true Map Join Vectorization: + bigTableKeyExpressions: col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true @@ -5902,10 +7083,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -5943,6 +7124,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -5960,12 +7146,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6022,13 +7209,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6104,10 +7292,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6145,6 +7333,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -6162,12 +7355,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6224,13 +7418,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6308,10 +7503,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6349,6 +7544,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -6366,12 +7566,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6428,13 +7629,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6504,10 +7706,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6545,6 +7747,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col1]] keys: 0 key (type: int) 1 _col1 (type: int) @@ -6570,12 +7777,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6632,13 +7840,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6719,10 +7928,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6760,6 +7969,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -6777,12 +7991,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6839,13 +8054,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -6918,10 +8134,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -6979,13 +8195,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7022,6 +8239,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -7047,12 +8269,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7125,10 +8348,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -7186,14 +8409,226 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 1:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7207,7 +8642,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -7229,38 +8664,36 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 1:string + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col1 + outputColumnNames: _col0, _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string native: true - projectedOutputColumnNums: [0] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7283,24 +8716,24 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7319,21 +8752,26 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -7348,14 +8786,79 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t1 + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -7364,9 +8867,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -7374,7 +8877,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -7386,18 +8889,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7415,7 +8919,100 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 2 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -7433,9 +9030,14 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [GenericUDFOPMultiply(Const int 2, Column[_col0])] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 (2 * _col0) (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int bigTableValueExpressions: col 0:int, col 1:string @@ -7445,17 +9047,18 @@ STAGE PLANS: nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -7470,11 +9073,76 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 3 + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -7515,26 +9183,25 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -7549,7 +9216,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -7557,7 +9224,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -7567,34 +9234,73 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [string], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 key (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int + bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0 + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 2:int + bigTableValueExpressions: col 0:int, col 1:string, col 2:int, col 3:string + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + output value column names: VALUE._col0, VALUE._col1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 2:int, 3:string + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7608,15 +9314,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -7626,7 +9332,54 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -7634,7 +9387,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -7646,18 +9399,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7680,24 +9434,24 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -7716,40 +9470,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -PREHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -7772,7 +9523,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -7780,17 +9531,22 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int, string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key], Column[value]] + 1 [Column[_col0], Column[_col1]] keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyExpressions: col 0:int + bigTableKeyExpressions: col 0:int, col 1:string bigTableValueExpressions: col 0:int, col 1:string className: VectorMapJoinOperator native: false @@ -7799,17 +9555,18 @@ STAGE PLANS: outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + skipTag: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7831,7 +9588,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -7839,41 +9596,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int + keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int) + keys: _col0 (type: int), _col1 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -7887,10 +9644,10 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -7914,13 +9671,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -7932,25 +9689,35 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 8 val_8 -PREHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +9 val_9 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -7973,7 +9740,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -7983,46 +9750,62 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string + bigTableValueExpressions: col 0:int className: VectorMapJoinOperator native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6 + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8036,15 +9819,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [string] + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -8054,19 +9837,39 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8075,12 +9878,12 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -8088,7 +9891,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -8098,7 +9901,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -8106,7 +9909,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -8118,18 +9921,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8152,31 +9956,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8188,37 +9992,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8233,7 +10037,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -8241,44 +10045,110 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int, col 1:string - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8287,20 +10157,20 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan - alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -8308,40 +10178,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string + keyExpressions: col 0:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8355,7 +10226,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -8364,31 +10235,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8400,35 +10271,49 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8443,55 +10328,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8500,7 +10360,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8509,52 +10369,26 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8563,7 +10397,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8572,7 +10406,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -8610,13 +10444,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -8636,6 +10471,48 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -8658,13 +10535,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8676,13 +10553,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -8694,6 +10571,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -8702,11 +10591,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8721,48 +10613,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8771,7 +10645,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -8780,25 +10654,26 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8816,7 +10691,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -8824,37 +10699,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -8873,6 +10756,48 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -8895,13 +10820,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -8913,13 +10838,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -8951,11 +10876,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -8970,29 +10898,81 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9001,7 +10981,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9010,25 +10990,53 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9037,7 +11045,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9046,7 +11054,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -9054,37 +11062,18 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - projectedOutputColumnNums: [0] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9093,7 +11082,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9103,23 +11092,6 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -9142,13 +11114,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9160,13 +11132,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -9194,18 +11166,18 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -9235,13 +11207,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9291,13 +11264,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9327,13 +11301,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9358,7 +11333,11 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 + Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + 2 [Column[key]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -9368,7 +11347,12 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9410,13 +11394,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -9444,18 +11428,20 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -9470,7 +11456,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9482,17 +11469,47 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9501,7 +11518,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9518,37 +11535,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9577,13 +11602,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9607,18 +11633,25 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9642,13 +11675,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9660,13 +11693,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -9694,19 +11727,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -9722,7 +11766,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -9734,17 +11779,47 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 0:int + bigTableValueExpressions: col 0:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9753,7 +11828,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -9770,37 +11845,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -9829,13 +11912,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -9859,18 +11943,25 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -9894,13 +11985,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -9966,10 +12057,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10007,6 +12098,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -10024,6 +12120,12 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[value]] keys: 0 _col1 (type: string) 1 value (type: string) @@ -10041,12 +12143,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10103,13 +12206,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10139,13 +12243,14 @@ STAGE PLANS: Reduce Output Operator key expressions: value (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10252,10 +12357,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10300,6 +12405,11 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[_col0]] keys: 0 _col1 (type: string) 1 _col0 (type: string) @@ -10379,13 +12489,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10421,10 +12532,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10462,17 +12573,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10480,12 +12598,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10542,13 +12661,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10624,10 +12744,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10665,17 +12785,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10683,12 +12810,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10745,13 +12873,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10829,10 +12958,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -10870,17 +12999,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -10888,12 +13024,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -10950,13 +13087,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11024,13 +13162,238 @@ PREHOOK: Input: default@t4 POSTHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t4 +POSTHOOK: Input: default@t4 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col1]] + keys: + 0 key (type: int) + 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) + predicate: (key < 15) (type: boolean) + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col1 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int, col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col1 (type: int), _col1 (type: int) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail -select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +val_0 +val_0 +val_0 +val_10 +val_2 +val_4 +val_5 +val_5 +val_5 +val_8 +val_9 +PREHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -11067,39 +13430,39 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) - 1 _col1 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] - outputColumnNames: _col1 + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11121,7 +13484,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -11129,40 +13492,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColLessLongScalar(col 0:int, val 15) - predicate: (key < 15) (type: boolean) - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) + predicate: ((value < 'val_10') and key is not null) (type: boolean) + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col1 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 0:int + keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col1 (type: int), _col1 (type: int) + keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) + key expressions: _col0 (type: int) sort order: + - Map-reduce partition columns: _col1 (type: int) + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11176,7 +13540,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -11185,31 +13549,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -11221,32 +13585,24 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t2 #### A masked pattern was here #### -val_0 -val_0 -val_0 -val_10 -val_2 -val_4 -val_5 -val_5 -val_5 -val_8 -val_9 -PREHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +0 val_0 +0 val_0 +0 val_0 +PREHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -11261,13 +13617,77 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan + alias: t3 + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) + predicate: (key > 5) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan alias: a Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: @@ -11283,31 +13703,47 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] - outputColumnNames: _col0, _col1 + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1 input vertices: - 1 Map 3 + 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + projectedOutputColumnNums: [1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11325,10 +13761,87 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +val_10 +val_8 +val_9 +PREHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan - alias: b + alias: t2 Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -11337,9 +13850,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterStringGroupColLessStringScalar(col 1:string, val val_10), SelectColumnIsNotNull(col 0:int)) - predicate: ((value < 'val_10') and key is not null) (type: boolean) - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) + predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int), value (type: string) outputColumnNames: _col0, _col1 @@ -11347,7 +13860,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -11359,18 +13872,19 @@ STAGE PLANS: keys: _col0 (type: int), _col1 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11388,29 +13902,105 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col1 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [1] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] + projectedOutputColumnNums: [0] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11429,24 +14019,21 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -PREHOOK: query: explain vectorization detail -select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -11468,8 +14055,8 @@ STAGE PLANS: Map 1 Map Operator Tree: TableScan - alias: t3 - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + alias: t1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -11477,8 +14064,8 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 5) - predicate: (key > 5) (type: boolean) + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) + predicate: (key > 2) (type: boolean) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) @@ -11503,13 +14090,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -11546,39 +14134,39 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] - outputColumnNames: _col1 + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 input vertices: 1 Map 1 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string native: true - projectedOutputColumnNums: [1] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11601,24 +14189,24 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -11637,24 +14225,26 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t3 +PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t3 +POSTHOOK: Input: default@t2 #### A masked pattern was here #### -val_10 -val_8 -val_9 -PREHOOK: query: explain vectorization detail -select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -11669,15 +14259,15 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t2 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -11685,40 +14275,45 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 0:int, val 5), FilterStringGroupColLessEqualStringScalar(col 1:string, val val_20)) - predicate: ((key > 5) and (value <= 'val_20')) (type: boolean) - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 - Select Vectorization: - className: VectorSelectOperator + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int, col 1:string - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int), _col1 (type: string) - mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11727,20 +14322,20 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 2 + Map 3 Map Operator Tree: TableScan - alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -11750,43 +14345,39 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int) - 1 _col0 (type: int) - Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] - className: VectorMapJoinLeftSemiLongOperator + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] - outputColumnNames: _col1 - input vertices: - 1 Map 1 - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col1 (type: string) + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [1] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: string) + key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -11795,16 +14386,16 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 3 + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -11816,24 +14407,24 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) + expressions: KEY.reducesinkkey0 (type: int) outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -11845,21 +14436,40 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -11874,76 +14484,13 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t1 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - TableScan Vectorization: - native: true - vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator - native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE - Execution mode: vectorized, llap - LLAP IO: all inputs - Map Vectorization: - enabled: true - enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true - inputFormatFeatureSupport: [] - featureSupportInUse: [] - inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false - usesVectorUDFAdaptor: false - vectorized: true - rowBatchContext: - dataColumnCount: 2 - includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 2 - Map Operator Tree: - TableScan alias: a Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: @@ -11959,30 +14506,38 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [GenericUDFOPMultiply(Const int 2, Column[_col0])] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -12001,7 +14556,72 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 3 + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -12042,26 +14662,25 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12076,7 +14695,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -12084,7 +14703,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12094,35 +14713,80 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [string], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 key (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] - className: VectorMapJoinLeftSemiLongOperator + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] - outputColumnNames: _col0 + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 0:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1, 3] + bigTableValueColumns: 0:int, 1:string, 0:int, 3:string + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 0:int, 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 0, 3] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + output value column names: VALUE._col0, VALUE._col1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 3:string + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12136,15 +14800,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12154,7 +14818,54 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -12162,7 +14873,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -12174,18 +14885,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12208,24 +14920,24 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -12244,40 +14956,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -PREHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12300,7 +15009,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12308,37 +15017,45 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int, string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key], Column[value]] + 1 [Column[_col0], Column[_col1]] keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] - className: VectorMapJoinLeftSemiLongOperator + bigTableKeyColumns: 0:int, 1:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + skipTag: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12360,7 +15077,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12368,41 +15085,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int + keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int) + keys: _col0 (type: int), _col1 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12416,10 +15133,10 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -12443,13 +15160,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12461,25 +15178,35 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 8 val_8 -PREHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +9 val_9 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12502,7 +15229,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12512,46 +15239,68 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6 + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12560,20 +15309,20 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [string] + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12583,19 +15332,39 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12604,12 +15373,12 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -12617,7 +15386,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12627,7 +15396,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -12635,7 +15404,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -12647,18 +15416,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12681,31 +15451,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12717,37 +15487,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12762,7 +15532,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -12770,45 +15540,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinOuterLongOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] - className: VectorMapJoinLeftSemiMultiKeyOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:int, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 3:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 3:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12822,15 +15626,52 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [bigint] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -12838,40 +15679,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + expressions: key (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string + keyExpressions: col 0:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -12885,7 +15727,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -12893,32 +15735,32 @@ STAGE PLANS: Execution mode: vectorized, llap Reduce Vectorization: enabled: true - enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -12930,35 +15772,49 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -12973,55 +15829,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13030,7 +15861,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13039,52 +15870,26 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13093,7 +15898,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13102,7 +15907,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -13140,13 +15945,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13166,6 +15972,48 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -13188,13 +16036,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13206,13 +16054,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13224,6 +16072,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -13232,11 +16092,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -13251,48 +16114,77 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Map 1 <- Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Map 7 (SIMPLE_EDGE), Union 2 (SIMPLE_EDGE) + Reducer 4 <- Reducer 3 (SIMPLE_EDGE) + Reducer 5 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Map Join Operator condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] keys: 0 key (type: int) 1 key (type: int) - 2 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + fullOuterSmallTableKeyMapping: 0 -> 3 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col5 input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 3:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + skipTag: false Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13301,7 +16193,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13309,26 +16201,38 @@ STAGE PLANS: includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Map 3 + scratchColumnTypeNames: [bigint] + Map 6 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13346,7 +16250,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 7 Map Operator Tree: TableScan alias: c @@ -13354,37 +16258,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13402,7 +16314,30 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 + Reducer 3 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Left Semi Join 0 to 1 + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -13425,17 +16360,90 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 5 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: key + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + fullOuterSmallTableKeyMapping: 0 -> 1 + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 6 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 1:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col5 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: _col5 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int + skipTag: false + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int) + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -13443,13 +16451,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13481,11 +16489,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -13500,29 +16511,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13540,25 +16608,53 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13567,7 +16663,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -13576,7 +16672,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -13584,37 +16680,18 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - projectedOutputColumnNums: [0] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -13623,33 +16700,16 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 includeColumns: [0] - dataColumns: key:int, value:string - partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -13672,13 +16732,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -13690,13 +16750,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13724,18 +16784,18 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -13765,13 +16825,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13821,13 +16882,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13857,13 +16919,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -13888,7 +16951,11 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 + Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + 2 [Column[key]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -13898,7 +16965,12 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -13940,13 +17012,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -13974,18 +17046,20 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -14000,7 +17074,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -14012,17 +17087,50 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14048,37 +17156,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14107,13 +17223,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14137,18 +17254,25 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -14172,13 +17296,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -14190,13 +17314,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -14224,19 +17348,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14252,8 +17387,9 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) + Reducer 3 <- Union 2 (SIMPLE_EDGE) + Reducer 4 <- Map 1 (SIMPLE_EDGE), Map 6 (BROADCAST_EDGE), Union 2 (CONTAINS) #### A masked pattern was here #### Vertices: Map 1 @@ -14264,17 +17400,83 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14292,7 +17494,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: b @@ -14300,37 +17502,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -14348,7 +17558,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 6 Map Operator Tree: TableScan alias: c @@ -14359,13 +17569,25 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14384,23 +17606,6 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -14424,17 +17629,78 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Reducer 4 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Map Join Operator + condition map: + Full Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinFullOuterIntersectLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 6 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + fullOuterIntersect: true + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Union 2 + Vertex: Union 2 Stage: Stage-0 Fetch Operator @@ -14496,10 +17762,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14537,17 +17803,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -14555,17 +17828,24 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[value]] keys: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -14573,12 +17853,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14635,13 +17916,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14671,13 +17953,14 @@ STAGE PLANS: Reduce Output Operator key expressions: value (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14784,10 +18067,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14832,17 +18115,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[_col0]] keys: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -14912,13 +18202,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -14954,10 +18245,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -14995,17 +18286,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -15013,12 +18311,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15075,13 +18374,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15157,10 +18457,10 @@ POSTHOOK: Input: default@t2 10 val_10 4 val_4 8 val_8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15198,17 +18498,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -15216,12 +18523,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15278,13 +18586,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15362,10 +18671,10 @@ POSTHOOK: Input: default@t2 10 val_5 4 val_2 8 val_4 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15403,17 +18712,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -15421,12 +18737,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15483,13 +18800,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15559,10 +18877,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15600,17 +18918,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col1]] keys: 0 key (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -15626,12 +18951,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15688,13 +19014,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col1 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15775,10 +19102,10 @@ val_5 val_5 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -15816,17 +19143,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -15834,12 +19168,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15896,13 +19231,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 3 Data size: 564 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -15975,10 +19311,10 @@ POSTHOOK: Input: default@t2 0 val_0 0 val_0 0 val_0 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16036,13 +19372,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16079,17 +19416,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 1 @@ -16105,12 +19449,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16183,10 +19528,10 @@ POSTHOOK: Input: default@t3 val_10 val_8 val_9 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -16244,13 +19589,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16287,17 +19633,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 1 @@ -16313,12 +19666,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16342,24 +19696,230 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 +#### A masked pattern was here #### +POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 +#### A masked pattern was here #### +PREHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) + Reducer 3 <- Map 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t1 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) + predicate: (key > 2) (type: boolean) + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1 + input vertices: + 1 Map 1 + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reducer 3 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:string + dataColumnCount: 2 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: string) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1] Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16378,21 +19938,26 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value +POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -PREHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +10 val_5 +10 val_5 +10 val_5 +4 val_2 +8 val_4 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -16407,14 +19972,82 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 2 <- Map 1 (BROADCAST_EDGE) - Reducer 3 <- Map 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan - alias: t1 + alias: a + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: b Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true @@ -16423,9 +20056,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterLongColGreaterLongScalar(col 0:int, val 2) - predicate: (key > 2) (type: boolean) - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -16433,7 +20066,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -16445,18 +20078,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16474,7 +20108,100 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 2 + Reducer 2 + Execution mode: vectorized, llap + Reduce Vectorization: + enabled: true + enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true + reduceColumnNullOrder: a + reduceColumnSortOrder: + + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@t1 +PREHOOK: Input: default@t3 +#### A masked pattern was here #### +POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t3 +#### A masked pattern was here #### +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +2 +4 +4 +5 +5 +5 +8 +8 +9 +PREHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: type: QUERY +POSTHOOK: query: explain vectorization detail debug +select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: type: QUERY +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Map 1 <- Map 3 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 Map Operator Tree: TableScan alias: a @@ -16492,30 +20219,38 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [GenericUDFOPMultiply(Const int 2, Column[_col0])] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 (2 * _col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: - 1 Map 1 + 1 Map 3 Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -16525,16 +20260,81 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 3 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) + predicate: (2 * key) is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: (2 * _col0) (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: (2 * _col0) (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] - Reducer 3 + scratchColumnTypeNames: [bigint] + Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -16575,26 +20375,25 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 #### A masked pattern was here #### -POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value +POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 #### A masked pattern was here #### -10 val_5 -10 val_5 -10 val_5 -4 val_2 -8 val_4 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +0 val_0 +0 val_0 +0 val_0 +8 val_8 +PREHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -16609,7 +20408,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -16617,7 +20416,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -16627,35 +20426,80 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Left Semi Join 0 to 1 + Inner Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [string], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] keys: 0 key (type: int) - 1 _col0 (type: int) + 1 key (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] - className: VectorMapJoinLeftSemiLongOperator + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] - outputColumnNames: _col0 + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 0:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col5, _col6 input vertices: 1 Map 3 - Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1, 3] + bigTableValueColumns: 0:int, 1:string, 0:int, 3:string + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 0:int, 3:string + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 1 Map 4 Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 0, 3] + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + output value column names: VALUE._col0, VALUE._col1 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int, 1:string + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 3:string + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + value expressions: _col2 (type: int), _col3 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16669,15 +20513,15 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [string] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -16687,7 +20531,54 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:string + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: string) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -16695,7 +20586,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -16707,18 +20598,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16741,24 +20633,24 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: a - reduceColumnSortOrder: + + reduceColumnNullOrder: aa + reduceColumnSortOrder: ++ allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 1 - dataColumns: KEY.reducesinkkey0:int + dataColumnCount: 4 + dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int) - outputColumnNames: _col0 + expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] + projectedOutputColumnNums: [0, 1, 2, 3] Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -16777,40 +20669,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key +POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 -0 -0 -0 -0 -0 -10 -10 -10 -10 -2 -4 -4 -5 -5 -5 -8 -8 -9 -PREHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +0 val_0 0 val_0 +10 val_10 10 val_5 +10 val_10 10 val_5 +10 val_10 10 val_5 +4 val_4 4 val_2 +8 val_8 8 val_4 +PREHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -16833,7 +20722,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -16841,37 +20730,45 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int, string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key], Column[value]] + 1 [Column[_col0], Column[_col1]] keys: - 0 key (type: int) - 1 (2 * _col0) (type: int) + 0 key (type: int), value (type: string) + 1 _col0 (type: int), _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] - className: VectorMapJoinLeftSemiLongOperator + bigTableKeyColumns: 0:int, 1:string + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string + className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: string) sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + skipTag: false + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16893,7 +20790,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -16901,41 +20798,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: SelectColumnIsNotNull(col 3:int)(children: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 3:int) - predicate: (2 * key) is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) + predicate: (key is not null and value is not null) (type: boolean) + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: key (type: int) - outputColumnNames: _col0 + expressions: key (type: int), value (type: string) + outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0, 1] + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int + keyExpressions: col 0:int, col 1:string native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int) + keys: _col0 (type: int), _col1 (type: string) mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: (2 * _col0) (type: int) - sort order: + - Map-reduce partition columns: (2 * _col0) (type: int) + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + output key column names: KEY.reducesinkkey0, KEY.reducesinkkey1 + Map-reduce partition columns: _col0 (type: int), _col1 (type: string) Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [1] - keyExpressions: LongScalarMultiplyLongColumn(val 2, col 0:int) -> 1:int + className: VectorReduceSinkMultiKeyOperator + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -16949,10 +20846,10 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0] + includeColumns: [0, 1] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [bigint] + scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: @@ -16976,13 +20873,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12 Data size: 2274 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -16994,25 +20891,35 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Input: default@t2 +PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value +POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Input: default@t2 +POSTHOOK: Input: default@t3 #### A masked pattern was here #### 0 val_0 0 val_0 0 val_0 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +2 val_2 +4 val_4 +5 val_5 +5 val_5 +5 val_5 8 val_8 -PREHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +9 val_9 +PREHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17035,7 +20942,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -17045,46 +20952,68 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE Map Join Operator condition map: - Inner Join 0 to 1 - Left Semi Join 1 to 2 + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) + 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int, col 1:string - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0, _col1, _col5, _col6 + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 input vertices: 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col5 (type: int), _col6 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Select Vectorization: - className: VectorSelectOperator + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[_col0]] + keys: + 0 _col0 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: int), _col3 (type: string) + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17093,20 +21022,20 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [string] + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -17116,19 +21045,39 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - value expressions: value (type: string) + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17137,12 +21086,12 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -17150,7 +21099,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -17160,7 +21109,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 0:int) predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: key (type: int) outputColumnNames: _col0 @@ -17168,7 +21117,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator @@ -17180,18 +21129,19 @@ STAGE PLANS: keys: _col0 (type: int) mode: hash outputColumnNames: _col0 - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17214,31 +21164,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 4 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string, VALUE._col0:int, VALUE._col1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string), VALUE._col0 (type: int), VALUE._col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1, 2, 3] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17250,37 +21200,37 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value +POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -0 val_0 0 val_0 -10 val_10 10 val_5 -10 val_10 10 val_5 -10 val_10 10 val_5 -4 val_4 4 val_2 -8 val_8 8 val_4 -PREHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17295,7 +21245,7 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -17303,45 +21253,79 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinOuterLongOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - keys: - 0 key (type: int), value (type: string) - 1 _col0 (type: int), _col1 (type: string) - Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] - className: VectorMapJoinLeftSemiMultiKeyOperator + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 0 -> 3 + projectedOutput: 0:int, 3:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0, _col5 + input vertices: + 1 Map 3 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] - outputColumnNames: _col0, _col1 - input vertices: - 1 Map 3 - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + predicateExpression: SelectColumnIsNotNull(col 3:int) + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 3:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17355,15 +21339,52 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 - scratchColumnTypeNames: [] + scratchColumnTypeNames: [bigint] Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0] + dataColumns: key:int, value:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 4 + Map Operator Tree: + TableScan + alias: c + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] @@ -17371,40 +21392,41 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 0:int), SelectColumnIsNotNull(col 1:string)) - predicate: (key is not null and value is not null) (type: boolean) - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int), value (type: string) - outputColumnNames: _col0, _col1 + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Group By Operator Group By Vectorization: className: VectorGroupByOperator groupByMode: HASH - keyExpressions: col 0:int, col 1:string + keyExpressions: col 0:int native: false vectorProcessingMode: HASH projectedOutputColumnNums: [] - keys: _col0 (type: int), _col1 (type: string) + keys: _col0 (type: int) mode: hash - outputColumnNames: _col0, _col1 - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col0 (type: int), _col1 (type: string) - sort order: ++ - Map-reduce partition columns: _col0 (type: int), _col1 (type: string) + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: - className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 2068 Basic stats: COMPLETE Column stats: NONE + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17418,7 +21440,7 @@ STAGE PLANS: vectorized: true rowBatchContext: dataColumnCount: 2 - includeColumns: [0, 1] + includeColumns: [0] dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] @@ -17427,31 +21449,31 @@ STAGE PLANS: Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true - reduceColumnNullOrder: aa - reduceColumnSortOrder: ++ + reduceColumnNullOrder: a + reduceColumnSortOrder: + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: - dataColumnCount: 2 - dataColumns: KEY.reducesinkkey0:int, KEY.reducesinkkey1:string + dataColumnCount: 1 + dataColumns: KEY.reducesinkkey0:int partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Select Operator - expressions: KEY.reducesinkkey0 (type: int), KEY.reducesinkkey1 (type: string) - outputColumnNames: _col0, _col1 + expressions: KEY.reducesinkkey0 (type: int) + outputColumnNames: _col0 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + projectedOutputColumnNums: [0] + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 22 Data size: 4136 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17463,35 +21485,49 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 +PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value +POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 +POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 #### A masked pattern was here #### -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -0 val_0 -10 val_10 -2 val_2 -4 val_4 -5 val_5 -5 val_5 -5 val_5 -8 val_8 -9 val_9 -PREHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +10 +10 +10 +10 +4 +4 +8 +8 +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17506,55 +21542,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE - Map Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17563,7 +21574,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17572,52 +21583,26 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Filter Operator - Filter Vectorization: - className: VectorFilterOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - predicateExpression: SelectColumnIsNotNull(col 0:int) - predicate: key is not null (type: boolean) - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator - native: true - projectedOutputColumnNums: [0] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17626,7 +21611,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17635,7 +21620,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -17673,13 +21658,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -17699,6 +21685,48 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -17721,13 +21749,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 46 Data size: 184 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17739,13 +21767,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -17757,6 +21785,18 @@ POSTHOOK: Input: default@t3 0 0 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 10 10 10 @@ -17765,11 +21805,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -17784,48 +21827,30 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) - Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (BROADCAST_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Map Join Operator - condition map: - Left Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - Map Join Vectorization: - bigTableKeyExpressions: col 0:int - bigTableValueExpressions: col 0:int - className: VectorMapJoinOperator - native: false - nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - nativeConditionsNotMet: One MapJoin Condition IS false - outputColumnNames: _col0 - input vertices: - 1 Map 3 - 2 Map 4 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Reduce Sink Vectorization: - className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17834,7 +21859,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -17843,25 +21868,26 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 3 + Map 4 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + skipTag: false + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17879,7 +21905,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 5 Map Operator Tree: TableScan alias: c @@ -17887,37 +21913,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -17936,6 +21970,48 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Merge Join Operator + condition map: + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[key]] + 1 [Column[key]] + keys: + 0 key (type: int) + 1 key (type: int) + outputColumnNames: _col0, _col5 + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: _col5 is not null (type: boolean) + Statistics: Num rows: 24 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col5]] + 1 [Column[_col0]] + keys: + 0 _col5 (type: int) + 1 _col0 (type: int) + outputColumnNames: _col0 + input vertices: + 1 Map 5 + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false + Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -17958,13 +22034,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 26 Data size: 105 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -17976,13 +22052,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18014,11 +22090,14 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -18033,29 +22112,86 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) - Reducer 3 <- Reducer 2 (SIMPLE_EDGE) + Map 1 <- Map 3 (BROADCAST_EDGE), Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: a - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 3 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] + keys: + 0 _col0 (type: int) + 1 key (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Reduce Sink Vectorization: + className: VectorReduceSinkObjectHashOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18073,25 +22209,53 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 4 + Map 3 Map Operator Tree: TableScan alias: b - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int) + outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18100,7 +22264,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: true + allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18109,7 +22273,7 @@ STAGE PLANS: dataColumns: key:int, value:string partitionColumnCount: 0 scratchColumnTypeNames: [] - Map 5 + Map 4 Map Operator Tree: TableScan alias: c @@ -18117,37 +22281,18 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int native: true - projectedOutputColumnNums: [0] + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash - outputColumnNames: _col0 - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18156,7 +22301,7 @@ STAGE PLANS: inputFormatFeatureSupport: [] featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat - allNative: false + allNative: true usesVectorUDFAdaptor: false vectorized: true rowBatchContext: @@ -18166,23 +22311,6 @@ STAGE PLANS: partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 - Execution mode: llap - Reduce Operator Tree: - Merge Join Operator - condition map: - Outer Join 0 to 1 - Left Semi Join 1 to 2 - keys: - 0 key (type: int) - 1 key (type: int) - 2 _col0 (type: int) - outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE - Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: enabled: true @@ -18205,13 +22333,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18223,13 +22351,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18257,18 +22385,18 @@ POSTHOOK: Input: default@t3 10 10 10 +16 +18 +20 4 4 8 8 -NULL -NULL -NULL -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -18298,13 +22426,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18354,13 +22483,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18390,13 +22520,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18421,7 +22552,11 @@ STAGE PLANS: Merge Join Operator condition map: Left Semi Join 0 to 1 - Left Outer Join 0 to 2 + Right Outer Join 0 to 2 + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + 2 [Column[key]] keys: 0 key (type: int) 1 _col0 (type: int) @@ -18431,7 +22566,12 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 + skipTag: false Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18473,13 +22613,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18507,18 +22647,20 @@ POSTHOOK: Input: default@t3 10 10 10 -16 -18 -20 4 4 8 8 -PREHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +NULL +NULL +NULL +NULL +NULL +PREHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail -select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: explain vectorization detail debug +select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: enabled: true @@ -18533,7 +22675,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18545,17 +22688,50 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18581,37 +22757,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18640,13 +22824,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18670,18 +22855,25 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Right Outer Join 0 to 2 + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18705,13 +22897,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -18723,13 +22915,13 @@ STAGE PLANS: Processor Tree: ListSink -PREHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +PREHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 #### A masked pattern was here #### -POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key +POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 @@ -18757,19 +22949,30 @@ POSTHOOK: Input: default@t3 10 10 10 +10 +10 +10 +10 +10 +10 +10 +10 +2 4 4 +5 +5 +5 8 8 +9 NULL NULL NULL -NULL -NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -18785,7 +22988,8 @@ STAGE PLANS: Tez #### A masked pattern was here #### Edges: - Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 4 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) + Map 1 <- Map 4 (BROADCAST_EDGE) + Reducer 2 <- Map 1 (SIMPLE_EDGE), Map 5 (SIMPLE_EDGE) Reducer 3 <- Reducer 2 (SIMPLE_EDGE) #### A masked pattern was here #### Vertices: @@ -18797,17 +23001,50 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Reduce Output Operator - key expressions: key (type: int) - sort order: + - Map-reduce partition columns: key (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] - Statistics: Num rows: 22 Data size: 88 Basic stats: COMPLETE Column stats: NONE + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) + Statistics: Num rows: 21 Data size: 84 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] + keys: + 0 key (type: int) + 1 _col0 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int + className: VectorMapJoinLeftSemiLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED + outputColumnNames: _col0 + input vertices: + 1 Map 4 + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: false + Statistics: Num rows: 23 Data size: 92 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18833,37 +23070,45 @@ STAGE PLANS: TableScan Vectorization: native: true vectorizationSchemaColumns: [0:key:int, 1:value:string, 2:ROW__ID:struct] - Select Operator - expressions: key (type: int) - outputColumnNames: _col0 - Select Vectorization: - className: VectorSelectOperator + Filter Operator + Filter Vectorization: + className: VectorFilterOperator native: true - projectedOutputColumnNums: [0] + predicateExpression: SelectColumnIsNotNull(col 0:int) + predicate: key is not null (type: boolean) Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Group By Operator - Group By Vectorization: - className: VectorGroupByOperator - groupByMode: HASH - keyExpressions: col 0:int - native: false - vectorProcessingMode: HASH - projectedOutputColumnNums: [] - keys: _col0 (type: int) - mode: hash + Select Operator + expressions: key (type: int) outputColumnNames: _col0 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0] Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: int) - sort order: + - Map-reduce partition columns: _col0 (type: int) - Reduce Sink Vectorization: - className: VectorReduceSinkLongOperator - keyColumnNums: [0] - native: true - nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + Group By Operator + Group By Vectorization: + className: VectorGroupByOperator + groupByMode: HASH + keyExpressions: col 0:int + native: false + vectorProcessingMode: HASH + projectedOutputColumnNums: [] + keys: _col0 (type: int) + mode: hash + outputColumnNames: _col0 Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + Map-reduce partition columns: _col0 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + skipTag: true + Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: @@ -18892,13 +23137,14 @@ STAGE PLANS: Reduce Output Operator key expressions: key (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: key (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -18922,18 +23168,25 @@ STAGE PLANS: Reduce Operator Tree: Merge Join Operator condition map: - Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 1 + outer filter mappings: [[1, 0], [0, 0]] + keyExpressions: + 0 [Column[_col0]] + 1 [Column[key]] keys: - 0 key (type: int) - 1 _col0 (type: int) - 2 key (type: int) + 0 _col0 (type: int) + 1 key (type: int) outputColumnNames: _col0 - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: int) sort order: + - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + output key column names: KEY.reducesinkkey0 + skipTag: false + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -18957,13 +23210,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 48 Data size: 193 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 25 Data size: 101 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -19029,10 +23282,10 @@ POSTHOOK: Input: default@t3 NULL NULL NULL -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19070,17 +23323,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[_col0]] keys: 0 key (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 @@ -19088,17 +23348,24 @@ STAGE PLANS: Map Join Operator condition map: Left Outer Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [[1, 0], null] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[value]] keys: 0 _col1 (type: string) 1 value (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -19106,12 +23373,13 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: false Statistics: Num rows: 25 Data size: 4776 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19168,13 +23436,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: int) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19204,13 +23473,14 @@ STAGE PLANS: Reduce Output Operator key expressions: value (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: value (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs @@ -19317,10 +23587,10 @@ POSTHOOK: Input: default@t3 4 8 8 -PREHOOK: query: explain vectorization detail +PREHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization detail +POSTHOOK: query: explain vectorization detail debug select a.key from t3 a left semi join t2 b on a.value = b.value where a.key > 100 POSTHOOK: type: QUERY PLAN VECTORIZATION: @@ -19365,17 +23635,24 @@ STAGE PLANS: Map Join Operator condition map: Left Semi Join 0 to 1 + keyContext: [types [string], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + valueContexts: [1:[types [], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[_col1]] + 1 [Column[_col0]] keys: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:string + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -19445,13 +23722,14 @@ STAGE PLANS: Reduce Output Operator key expressions: _col0 (type: string) sort order: + + output key column names: KEY.reducesinkkey0 Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] + skipTag: true Statistics: Num rows: 11 Data size: 2024 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_like_2.q.out ql/src/test/results/clientpositive/llap/vector_like_2.q.out index 8e132a7..1db8164 100644 --- ql/src/test/results/clientpositive/llap/vector_like_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_like_2.q.out @@ -63,10 +63,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:boolean Statistics: Num rows: 3 Data size: 279 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out index d5d8e53..ec65f98 100644 --- ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_llap_text_1.q.out @@ -166,11 +166,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:int + valueColumns: 1:string Statistics: Num rows: 242 Data size: 22990 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -222,14 +222,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 4, 1] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 0:int, 4:string, 1:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 0 Map 1 @@ -247,10 +249,9 @@ STAGE PLANS: sort order: +++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 4, 1] + keyColumns: 0:int, 4:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 391 Data size: 72726 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out index 37821fb..8e153a4 100644 --- ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/llap/vector_mapjoin_reduce.q.out @@ -203,6 +203,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -217,6 +218,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -477,6 +479,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -491,6 +494,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out index 7c1cbb6..4e8a174 100644 --- ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out +++ ql/src/test/results/clientpositive/llap/vector_mr_diff_schema_alias.q.out @@ -351,6 +351,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: int) Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -375,6 +378,9 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 value expressions: _col1 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out index 9801470..f2e23ff 100644 --- ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out +++ ql/src/test/results/clientpositive/llap/vector_nullsafe_join.q.out @@ -840,26 +840,142 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: hive.vectorized.execution.mapjoin.native.enabled IS false, No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -1733,26 +1849,142 @@ NULL 35 NULL NULL NULL NULL 10 NULL NULL NULL 48 NULL NULL NULL NULL NULL -PREHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY -PREHOOK: Input: default@myinput1 -#### A masked pattern was here #### -POSTHOOK: query: SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL DEBUG +SELECT * FROM myinput1 a RIGHT OUTER JOIN myinput1 b ON a.key<=>b.value POSTHOOK: type: QUERY -POSTHOOK: Input: default@myinput1 +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez #### A masked pattern was here #### -10 NULL NULL 10 -100 100 100 100 -NULL 10 10 NULL -NULL 10 48 NULL -NULL 10 NULL NULL -NULL 35 10 NULL -NULL 35 48 NULL -NULL 35 NULL NULL -NULL NULL 10 NULL -NULL NULL 48 NULL -NULL NULL NULL 35 -NULL NULL NULL NULL + Edges: + Map 2 <- Map 1 (BROADCAST_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: a + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Reduce Output Operator + key expressions: key (type: int) + sort order: + + output key column names: KEY.reducesinkkey0 + output value column names: VALUE._col0 + Map-reduce partition columns: key (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 0:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 1:int + skipTag: true + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + value expressions: value (type: int) + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [] + Map 2 + Map Operator Tree: + TableScan + alias: b + Statistics: Num rows: 6 Data size: 48 Basic stats: COMPLETE Column stats: NONE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:ROW__ID:struct] + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keyContext: [types [int], serde=org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe, hasFilter=false] + outer filter mappings: [null, [0, 0]] + valueContexts: [0:[types [int], serde=org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe, hasFilter=false]] + keyExpressions: + 0 [Column[key]] + 1 [Column[value]] + keys: + 0 key (type: int) + 1 value (type: int) + Map Join Vectorization: + bigTableKeyExpressions: col 1:int + bigTableValueExpressions: col 0:int, col 1:int + className: VectorMapJoinOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: No nullsafe IS false + nullSafes: [true] + outputColumnNames: _col0, _col1, _col5, _col6 + input vertices: + 0 Map 1 + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: int), _col1 (type: int), _col5 (type: int), _col6 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2, 3] + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false + Statistics: Num rows: 6 Data size: 52 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true + inputFormatFeatureSupport: [] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 2 + includeColumns: [0, 1] + dataColumns: key:int, value:int + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint] + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + PREHOOK: query: SELECT * FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key<=>b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/llap/vector_order_null.q.out ql/src/test/results/clientpositive/llap/vector_order_null.q.out index 58566df..fe2eafe 100644 --- ql/src/test/results/clientpositive/llap/vector_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_order_null.q.out @@ -116,10 +116,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -239,10 +238,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -362,10 +360,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -485,10 +482,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -608,10 +604,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -731,10 +726,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -854,10 +848,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -977,10 +970,9 @@ STAGE PLANS: sort order: -+ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1100,10 +1092,9 @@ STAGE PLANS: sort order: +- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1223,10 +1214,9 @@ STAGE PLANS: sort order: -- Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1346,10 +1336,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6 Data size: 364 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out index 50e6a85..71a97c3 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join0.q.out @@ -107,15 +107,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -169,10 +170,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap @@ -259,10 +260,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 6 Data size: 554 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string) Execution mode: vectorized, llap @@ -305,15 +306,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 4901e83..5c91324 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -267,15 +267,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -329,10 +330,10 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 3, 4, 5, 6, 7, 8, 9, 10, 11] + valueColumns: 0:tinyint, 1:smallint, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean Statistics: Num rows: 15 Data size: 3697 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap @@ -443,13 +444,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -503,10 +505,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -709,13 +710,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -727,13 +729,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -754,10 +757,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -799,10 +801,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -843,10 +844,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 15 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out index a841d4c..8bf4885 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_join2.q.out @@ -284,13 +284,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -302,13 +303,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -329,10 +331,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -374,10 +375,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -418,10 +418,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 20 Data size: 88 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs diff --git ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out index 839952f..ba37846 100644 --- ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out +++ ql/src/test/results/clientpositive/llap/vector_outer_reference_windowed.q.out @@ -296,10 +296,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -354,11 +353,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(25,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(25,2)) Reducer 3 @@ -518,10 +517,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(15,2), 1:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Execution mode: vectorized, llap @@ -578,11 +577,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 3 @@ -748,10 +747,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -801,10 +800,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -847,6 +845,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -883,11 +884,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1057,10 +1058,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1110,10 +1111,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1156,6 +1156,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)), _col1 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1192,11 +1195,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(15,2), 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(15,2) + valueColumns: 2:decimal(25,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(25,2)) Reducer 4 @@ -1365,10 +1368,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1418,10 +1421,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(15,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(15,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(15,2)) Execution mode: vectorized, llap @@ -1465,6 +1468,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(15,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -1600,10 +1606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1658,11 +1663,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:int keyExpressions: ConstantVectorExpression(val 0) -> 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:decimal(17,2) Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: decimal(17,2)) Reducer 3 @@ -1822,10 +1827,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:decimal(7,2), 1:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Execution mode: vectorized, llap @@ -1882,11 +1887,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 3 @@ -2052,10 +2057,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2105,10 +2110,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2151,6 +2155,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2187,11 +2194,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2361,10 +2368,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2414,10 +2421,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4 Data size: 448 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2460,6 +2466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)), _col1 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2496,11 +2505,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:decimal(7,2), 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2] + partitionColumns: 1:decimal(7,2) + valueColumns: 2:decimal(17,2) Statistics: Num rows: 2 Data size: 672 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: decimal(17,2)) Reducer 4 @@ -2669,10 +2678,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2722,10 +2731,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:decimal(7,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:decimal(7,2) Statistics: Num rows: 4 Data size: 896 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: decimal(7,2)) Execution mode: vectorized, llap @@ -2769,6 +2778,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: decimal(7,2)) Statistics: Num rows: 2 Data size: 704 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out index 8d9ffb8..3654116 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_1.q.out @@ -98,11 +98,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:struct Statistics: Num rows: 100 Data size: 18816 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out index 9aeb650..23f7eca 100644 --- ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out +++ ql/src/test/results/clientpositive/llap/vector_ptf_part_simple.q.out @@ -142,10 +142,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -402,10 +402,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -631,10 +631,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -860,11 +860,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1122,11 +1122,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1352,11 +1352,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1582,12 +1582,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1845,12 +1845,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2076,12 +2076,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2301,10 +2301,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2529,10 +2529,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2757,10 +2757,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2954,11 +2954,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3184,11 +3184,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3414,11 +3414,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -3612,12 +3612,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3843,12 +3843,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4074,12 +4074,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:string keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 2] + partitionColumns: 5:int + valueColumns: 0:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_mfgr (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4315,10 +4315,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4543,11 +4543,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:decimal(38,18) Statistics: Num rows: 40 Data size: 12944 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: decimal(38,18)) Execution mode: vectorized, llap @@ -4793,10 +4793,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5021,11 +5021,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:bigint Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_bigint (type: bigint) Execution mode: vectorized, llap @@ -5245,10 +5245,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5444,11 +5444,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -5645,11 +5645,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5814,12 +5814,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6146,10 +6146,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:double Statistics: Num rows: 40 Data size: 4216 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6345,11 +6345,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6546,12 +6546,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 1] + keyColumns: 0:string, 6:timestamp, 1:string keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 9] - valueColumnNums: [2] + partitionColumns: 0:string, 9:timestamp + valueColumns: 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -6748,11 +6748,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), CASE WHEN ((p_mfgr = 'Manufacturer#2')) THEN (TIMESTAMP'2000-01-01 00:00:00.0') ELSE (CAST( null AS TIMESTAMP)) END (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 6] + keyColumns: 0:string, 6:timestamp keyExpressions: IfExprColumnNull(col 4:boolean, col 5:timestamp, null)(children: StringGroupColEqualStringScalar(col 0:string, val Manufacturer#2) -> 4:boolean, ConstantVectorExpression(val 2000-01-01 00:00:00.0) -> 5:timestamp) -> 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:double Statistics: Num rows: 40 Data size: 9096 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out index bd42ed2..5894fa0 100644 --- ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reduce_groupby_duplicate_cols.q.out @@ -87,13 +87,15 @@ STAGE PLANS: 0 one (type: int), two (type: int) 1 1 (type: int), 2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0, 1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int, 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:int className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 1 diff --git ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index 9eaf293..9e1493c 100644 --- ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -127,10 +127,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + valueColumns: 0:struct, 1:double, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct, 7:tinyint, 8:bigint Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap @@ -310,10 +309,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8] + valueColumns: 0:struct, 1:double, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct, 7:tinyint, 8:bigint Statistics: Num rows: 1 Data size: 492 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: tinyint), _col8 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 9859824..8a388b8 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -2800,10 +2800,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap @@ -2943,10 +2942,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing.q.out ql/src/test/results/clientpositive/llap/vector_windowing.q.out index a821265..b7abf78 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing.q.out @@ -43,11 +43,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -246,11 +246,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -454,11 +454,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -633,11 +633,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -828,11 +828,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1030,11 +1030,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1238,11 +1238,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1284,11 +1284,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1360,6 +1359,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -1525,11 +1527,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1, 5] + keyColumns: 2:string, 1:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1684,11 +1685,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1861,11 +1862,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2040,11 +2041,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2230,12 +2231,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#3' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#3) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 1115 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2398,11 +2399,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2565,11 +2566,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -2785,11 +2786,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3077,11 +3078,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3356,11 +3357,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3458,11 +3459,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 3 @@ -3609,11 +3610,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3823,11 +3824,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string), _col2 (type: int), _col3 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:string, 1:string, 2:int, 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0, 1, 2, 3] - valueColumnNums: [4, 5] + partitionColumns: 0:string, 1:string, 2:int, 3:double + valueColumns: 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: double), _col5 (type: double) Execution mode: vectorized, llap @@ -3892,11 +3893,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [2, 3, 4, 5] + partitionColumns: 1:string + valueColumns: 2:int, 3:double, 4:double, 5:double Statistics: Num rows: 13 Data size: 3211 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: int), _col3 (type: double), _col4 (type: double), _col5 (type: double) Reducer 3 @@ -4056,11 +4057,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4259,11 +4260,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4527,11 +4528,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -4809,11 +4810,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [3, 7] + partitionColumns: 2:string + valueColumns: 3:string, 7:double Statistics: Num rows: 26 Data size: 8294 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_brand (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -5298,11 +5299,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Reduce Output Operator @@ -5311,11 +5312,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Reduce Output Operator @@ -5324,11 +5325,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5713,11 +5714,11 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3, 4, 2] + partitionColumns: 0:string + valueColumns: 3:bigint, 4:bigint, 2:int Statistics: Num rows: 26 Data size: 12766 Basic stats: COMPLETE Column stats: COMPLETE value expressions: count_window_0 (type: bigint), count_window_1 (type: bigint), _col5 (type: int) Reducer 9 @@ -6081,11 +6082,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:string, 1:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [3] + partitionColumns: 0:string + valueColumns: 3:double Statistics: Num rows: 13 Data size: 3003 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Execution mode: vectorized, llap @@ -6264,11 +6265,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1] + partitionColumns: 2:string + valueColumns: 1:string Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string) Execution mode: vectorized, llap @@ -6429,11 +6430,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6586,11 +6587,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6749,11 +6750,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -6922,11 +6923,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7089,11 +7090,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7266,11 +7267,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7447,11 +7448,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7627,11 +7628,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -7825,11 +7826,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -8021,11 +8022,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 2756 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -8245,11 +8246,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2, 1] - valueColumnNums: [5, 7] + partitionColumns: 2:string, 1:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -8485,12 +8486,12 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 10] + keyColumns: 2:string, 10:string keyExpressions: StringSubstrColStart(col 4:string, start 1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4] + partitionColumns: 2:string + valueColumns: 4:string Statistics: Num rows: 26 Data size: 5252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_type (type: string) Execution mode: vectorized, llap @@ -8675,11 +8676,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -8830,11 +8831,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9018,11 +9019,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9173,11 +9174,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 5] + keyColumns: 2:string, 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [1, 7] + partitionColumns: 2:string + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9331,12 +9332,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:int keyExpressions: ConstantVectorExpression(val 0) -> 10:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [1, 7] + partitionColumns: 11:int + valueColumns: 1:string, 7:double Statistics: Num rows: 26 Data size: 3354 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_retailprice (type: double) Execution mode: vectorized, llap @@ -9427,10 +9428,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 5] + valueColumns: 2:double, 5:double Statistics: Num rows: 26 Data size: 3562 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: double) Reducer 3 @@ -9565,12 +9566,11 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#6' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5] + keyColumns: 10:string, 5:int keyExpressions: ConstantVectorExpression(val Manufacturer#6) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [] + partitionColumns: 11:string Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -9705,12 +9705,12 @@ STAGE PLANS: Map-reduce partition columns: 'Manufacturer#1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 1] + keyColumns: 10:string, 1:string keyExpressions: ConstantVectorExpression(val Manufacturer#1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [7] + partitionColumns: 11:string + valueColumns: 7:double Statistics: Num rows: 5 Data size: 1135 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -9854,12 +9854,12 @@ STAGE PLANS: Map-reduce partition columns: 'm1' (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10] + keyColumns: 10:string keyExpressions: ConstantVectorExpression(val m1) -> 10:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [11] - valueColumnNums: [5] + partitionColumns: 11:string + valueColumns: 5:int Statistics: Num rows: 5 Data size: 510 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out index 401a73e..663baa8 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_expressions.q.out @@ -89,11 +89,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -283,11 +283,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -445,11 +445,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 6, 7, 1, 4] + keyColumns: 0:tinyint, 6:boolean, 7:string, 1:smallint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -669,11 +668,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 7] + keyColumns: 1:smallint, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:smallint Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -893,11 +891,10 @@ STAGE PLANS: Map-reduce partition columns: b (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 1, 7, 5] + keyColumns: 3:bigint, 1:smallint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:bigint Statistics: Num rows: 1 Data size: 204 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1117,11 +1114,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 3] + keyColumns: 4:float, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1342,10 +1339,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7] + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1520,11 +1517,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 4] + keyColumns: 2:string, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:string + valueColumns: 7:double Statistics: Num rows: 26 Data size: 5460 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_retailprice (type: double) Execution mode: vectorized, llap @@ -1687,11 +1684,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2] + keyColumns: 8:timestamp, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1931,11 +1928,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7] + keyColumns: 2:string, 7:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 2860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out index d87e96f..61b09e7 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby.q.out @@ -58,10 +58,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -111,10 +111,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -157,6 +157,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -201,12 +204,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out index 8dcb900..106322b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_gby2.q.out @@ -62,10 +62,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized, llap @@ -130,12 +130,11 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:int, 1:bigint keyExpressions: ConstantVectorExpression(val 0) -> 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [3] - valueColumnNums: [] + partitionColumns: 3:int Statistics: Num rows: 7 Data size: 651 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -305,10 +304,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:string, 2:bigint Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: bigint) Execution mode: vectorized, llap @@ -365,11 +364,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2] + keyColumns: 1:string, 2:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:string + valueColumns: 0:int Statistics: Num rows: 6 Data size: 1176 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Reducer 3 @@ -542,10 +541,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3, 4, 5] + valueColumns: 2:double, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: int), _col5 (type: double) Execution mode: vectorized, llap @@ -602,11 +601,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1, 3, 4, 5] + partitionColumns: 0:string + valueColumns: 1:string, 3:double, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col3 (type: double), _col4 (type: int), _col5 (type: double) Reducer 3 @@ -681,12 +680,12 @@ STAGE PLANS: Map-reduce partition columns: lower(_col1) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 3] + keyColumns: 7:string, 3:double keyExpressions: StringLower(col 2:string) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [6, 2, 4, 5] + partitionColumns: 8:string + valueColumns: 6:int, 2:string, 4:int, 5:double Statistics: Num rows: 10 Data size: 1980 Basic stats: COMPLETE Column stats: COMPLETE value expressions: rank_window_0 (type: int), _col1 (type: string), _col4 (type: int), _col5 (type: double) Reducer 4 @@ -761,11 +760,11 @@ STAGE PLANS: Map-reduce partition columns: _col5 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 5] + keyColumns: 4:int, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [6, 2] + partitionColumns: 4:int + valueColumns: 6:int, 2:int Statistics: Num rows: 10 Data size: 1005 Basic stats: COMPLETE Column stats: COMPLETE value expressions: dense_rank_window_1 (type: int), _col0 (type: int) Reducer 5 @@ -901,10 +900,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [1] + keyColumns: 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 4] + valueColumns: 2:int, 4:boolean Statistics: Num rows: 18 Data size: 1581 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: boolean) Execution mode: vectorized, llap @@ -954,10 +953,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [6] + keyColumns: 6:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:int Statistics: Num rows: 9174 Data size: 671296 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -1000,6 +999,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean) Statistics: Num rows: 3 Data size: 60 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1044,12 +1046,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 6] + keyColumns: 3:int, 6:double keyExpressions: ConstantVectorExpression(val 0) -> 3:int, DoubleColDivideDoubleColumn(col 4:double, col 5:double)(children: CastLongToDouble(col 1:bigint) -> 4:double, CastLongToDouble(col 2:bigint) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 2] + partitionColumns: 7:int + valueColumns: 1:bigint, 2:bigint Statistics: Num rows: 2 Data size: 40 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Reducer 4 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out index 595b0a8..e52a76b 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_multipartitioning.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1] + keyColumns: 7:string, 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3] + partitionColumns: 7:string + valueColumns: 3:bigint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint) Execution mode: vectorized, llap @@ -10256,11 +10256,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [3, 8] + partitionColumns: 7:string + valueColumns: 3:bigint, 8:timestamp Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: b (type: bigint), ts (type: timestamp) Execution mode: vectorized, llap @@ -10353,11 +10353,11 @@ STAGE PLANS: Map-reduce partition columns: _col7 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 3] + keyColumns: 0:string, 3:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [4, 2] + partitionColumns: 0:string + valueColumns: 4:int, 2:bigint Statistics: Num rows: 1 Data size: 344 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col3 (type: bigint) Reducer 3 @@ -10534,10 +10534,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 4] + valueColumns: 1:smallint, 2:int, 4:float Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int), f (type: float) Execution mode: vectorized, llap @@ -10628,10 +10628,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [1] + keyColumns: 1:smallint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 3, 0] + valueColumns: 4:bigint, 3:float, 0:string Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: bigint), _col4 (type: float), _col7 (type: string) Reducer 3 @@ -10801,11 +10801,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 6] + keyColumns: 7:string, 6:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 10] + partitionColumns: 7:string + valueColumns: 1:smallint, 10:binary Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), bin (type: binary) Execution mode: vectorized, llap @@ -10898,11 +10898,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:binary native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 336 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 @@ -11074,10 +11074,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [2] + keyColumns: 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 7] + valueColumns: 4:float, 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: f (type: float), s (type: string) Execution mode: vectorized, llap @@ -11168,12 +11168,12 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 1] + keyColumns: 4:int, 1:float keyExpressions: ConstantVectorExpression(val 0) -> 4:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [3, 2] + partitionColumns: 5:int + valueColumns: 3:double, 2:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: sum_window_0 (type: double), _col7 (type: string) Reducer 3 @@ -11354,11 +11354,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 9] + keyColumns: 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [1, 4] + partitionColumns: 7:string + valueColumns: 1:smallint, 4:float Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), f (type: float) Execution mode: vectorized, llap @@ -11451,11 +11451,11 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 3] + keyColumns: 2:smallint, 3:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [4, 0] + partitionColumns: 2:smallint + valueColumns: 4:int, 0:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: rank_window_0 (type: int), _col7 (type: string) Reducer 3 diff --git ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out index 9358281..ba58992 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_navfn.q.out @@ -88,11 +88,10 @@ STAGE PLANS: Map-reduce partition columns: 0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: ConstantVectorExpression(val 0) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 174 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs @@ -233,11 +232,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 9] + keyColumns: 5:double, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [7] + partitionColumns: 5:double + valueColumns: 7:string Statistics: Num rows: 1 Data size: 304 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -492,11 +491,11 @@ STAGE PLANS: Map-reduce partition columns: bin (type: binary) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 5, 2] + keyColumns: 10:binary, 5:double, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [10] - valueColumnNums: [7] + partitionColumns: 10:binary + valueColumns: 7:string Statistics: Num rows: 1 Data size: 340 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -717,11 +716,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 9] + keyColumns: 2:int, 7:string, 9:decimal(4,2) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -941,11 +939,11 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 4] + keyColumns: 5:double, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [0, 7] + partitionColumns: 5:double + valueColumns: 0:tinyint, 7:string Statistics: Num rows: 1 Data size: 200 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint), s (type: string) Execution mode: vectorized, llap @@ -1200,11 +1198,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 7] + keyColumns: 6:boolean, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1432,12 +1429,12 @@ STAGE PLANS: Map-reduce partition columns: UDFToByte(10) (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [12, 7] + keyColumns: 12:tinyint, 7:string keyExpressions: ConstantVectorExpression(val 10) -> 12:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [13] - valueColumnNums: [2] + partitionColumns: 13:tinyint + valueColumns: 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1627,11 +1624,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1800,11 +1796,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1973,11 +1968,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -2146,11 +2140,10 @@ STAGE PLANS: Map-reduce partition columns: a (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:int, 1:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:int Statistics: Num rows: 15 Data size: 120 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out index 458a55d..648f97d 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_order_null.q.out @@ -89,11 +89,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -222,11 +221,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -355,11 +353,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -489,11 +487,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -622,11 +619,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -792,11 +789,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -957,11 +954,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -1122,11 +1119,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out index 256b80d..5ef540a 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_range_multiorder.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 2, 3] + keyColumns: 1:smallint, 2:int, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [0] + partitionColumns: 1:smallint + valueColumns: 0:tinyint Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE value expressions: t (type: tinyint) Execution mode: vectorized, llap @@ -340,11 +340,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -563,11 +562,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -786,10 +784,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2] + valueColumns: 1:smallint, 2:int Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: si (type: smallint), i (type: int) Execution mode: vectorized, llap @@ -10936,11 +10934,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11194,11 +11191,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11452,11 +11448,10 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 1, 2] + keyColumns: 7:string, 1:smallint, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [] + partitionColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11710,11 +11705,10 @@ STAGE PLANS: Map-reduce partition columns: si (type: smallint), bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 6, 2, 4] + keyColumns: 1:smallint, 6:boolean, 2:int, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1, 6] - valueColumnNums: [] + partitionColumns: 1:smallint, 6:boolean Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -11968,11 +11962,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 6, 3] + keyColumns: 2:int, 6:boolean, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -12192,12 +12185,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:char(12) keyExpressions: CastStringGroupToChar(col 7:string, maxLength 12) -> 12:char(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -12418,12 +12411,12 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 12] + keyColumns: 2:int, 12:varchar(12) keyExpressions: CastStringGroupToVarChar(col 7:string, maxLength 12) -> 12:varchar(12) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [7] + partitionColumns: 2:int + valueColumns: 7:string Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out index 406bd93..97b6b62 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_rank.q.out @@ -81,11 +81,11 @@ STAGE PLANS: Map-reduce partition columns: f (type: float) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [4, 0] + keyColumns: 4:float, 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [4] - valueColumnNums: [7] + partitionColumns: 4:float + valueColumns: 7:string Statistics: Num rows: 1 Data size: 192 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -341,11 +341,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 2, 7] + keyColumns: 8:timestamp, 2:int, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -565,11 +564,10 @@ STAGE PLANS: Map-reduce partition columns: bo (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [6, 3, 7] + keyColumns: 6:boolean, 3:bigint, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [6] - valueColumnNums: [] + partitionColumns: 6:boolean Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -789,11 +787,11 @@ STAGE PLANS: Map-reduce partition columns: dec (type: decimal(4,2)) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [9, 4] + keyColumns: 9:decimal(4,2), 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [9] - valueColumnNums: [7] + partitionColumns: 9:decimal(4,2) + valueColumns: 7:string Statistics: Num rows: 1 Data size: 300 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -1050,10 +1048,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1103,10 +1101,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1142,6 +1139,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1351,10 +1351,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 160 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: timestamp), _col2 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1404,10 +1404,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1444,6 +1443,9 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: timestamp) Statistics: Num rows: 1 Data size: 176 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: decimal(4,2)) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1654,10 +1656,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [8, 9] + valueColumns: 8:timestamp, 9:decimal(4,2) Statistics: Num rows: 1 Data size: 164 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: timestamp), _col3 (type: decimal(4,2)) Execution mode: vectorized, llap @@ -1707,10 +1709,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1746,6 +1747,9 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col2 (type: timestamp) Statistics: Num rows: 1 Data size: 180 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out index ae07a78..45fd7cc 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_streaming.q.out @@ -83,11 +83,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:string Statistics: Num rows: 26 Data size: 5694 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out index bff683f..bfa7485 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec.q.out @@ -81,11 +81,10 @@ STAGE PLANS: Map-reduce partition columns: i (type: int) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 7, 3] + keyColumns: 2:int, 7:string, 3:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [] + partitionColumns: 2:int Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -304,11 +303,10 @@ STAGE PLANS: Map-reduce partition columns: d (type: double) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [5, 7, 4] + keyColumns: 5:double, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [5] - valueColumnNums: [] + partitionColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -527,11 +525,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [7] + partitionColumns: 8:timestamp + valueColumns: 7:string Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: s (type: string) Execution mode: vectorized, llap @@ -751,11 +749,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7, 4] + keyColumns: 8:timestamp, 7:string, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -974,11 +971,10 @@ STAGE PLANS: Map-reduce partition columns: t (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 7, 5] + keyColumns: 0:tinyint, 7:string, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1197,11 +1193,11 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 7] + keyColumns: 8:timestamp, 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [2] + partitionColumns: 8:timestamp + valueColumns: 2:int Statistics: Num rows: 1 Data size: 228 Basic stats: COMPLETE Column stats: NONE value expressions: i (type: int) Execution mode: vectorized, llap @@ -1456,11 +1452,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1714,11 +1709,10 @@ STAGE PLANS: Map-reduce partition columns: ts (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 4] + keyColumns: 8:timestamp, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [8] - valueColumnNums: [] + partitionColumns: 8:timestamp Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized, llap LLAP IO: no inputs @@ -1937,11 +1931,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2104,11 +2098,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap @@ -2271,11 +2265,11 @@ STAGE PLANS: Map-reduce partition columns: s (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [7, 2] + keyColumns: 7:string, 2:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [7] - valueColumnNums: [5] + partitionColumns: 7:string + valueColumns: 5:double Statistics: Num rows: 1 Data size: 196 Basic stats: COMPLETE Column stats: NONE value expressions: d (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out index fe1e538..13087c4 100644 --- ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out +++ ql/src/test/results/clientpositive/llap/vector_windowing_windowspec4.q.out @@ -80,11 +80,10 @@ STAGE PLANS: Map-reduce partition columns: type (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:string, 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [1] - valueColumnNums: [] + partitionColumns: 1:string Statistics: Num rows: 3 Data size: 267 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: no inputs diff --git ql/src/test/results/clientpositive/llap/vectorization_0.q.out ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 1a846ab..e4eb5b5 100644 --- ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -63,10 +63,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -119,10 +118,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -244,10 +243,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -300,10 +298,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -570,10 +567,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -626,10 +622,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -751,10 +747,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap @@ -807,10 +802,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1077,10 +1071,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized, llap @@ -1133,10 +1126,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1258,10 +1251,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: double) Execution mode: vectorized, llap @@ -1314,10 +1306,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap @@ -1629,10 +1620,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_1.q.out ql/src/test/results/clientpositive/llap/vectorization_1.q.out index dbee077..e71cfa9 100644 --- ql/src/test/results/clientpositive/llap/vectorization_1.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_1.q.out @@ -95,10 +95,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_12.q.out ql/src/test/results/clientpositive/llap/vectorization_12.q.out index c9faf55..5ea5fac 100644 --- ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -122,10 +122,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 1 Data size: 370 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_13.q.out ql/src/test/results/clientpositive/llap/vectorization_13.q.out index 82982e6..74e5671 100644 --- ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 816734 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_14.q.out ql/src/test/results/clientpositive/llap/vectorization_14.q.out index eaf5157..8a59d24 100644 --- ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -125,10 +125,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 303 Data size: 137686 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_15.q.out ql/src/test/results/clientpositive/llap/vectorization_15.q.out index 0887212..1e9d800 100644 --- ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -120,10 +120,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 6144 Data size: 3293884 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_16.q.out ql/src/test/results/clientpositive/llap/vectorization_16.q.out index dd2e5f0..43d3a82 100644 --- ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_17.q.out ql/src/test/results/clientpositive/llap/vectorization_17.q.out index d0b2f7a..ab50738 100644 --- ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -91,10 +91,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 1212930 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_2.q.out ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 96badf9..62cdc6e 100644 --- ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -99,10 +99,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_3.q.out ql/src/test/results/clientpositive/llap/vectorization_3.q.out index b472c2d..405bed7 100644 --- ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -104,10 +104,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_4.q.out ql/src/test/results/clientpositive/llap/vectorization_4.q.out index 122f3fb..efbbf61 100644 --- ql/src/test/results/clientpositive/llap/vectorization_4.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_4.q.out @@ -99,10 +99,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_5.q.out ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 5124740..be9c39f 100644 --- ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_7.q.out ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 907411b..3554928 100644 --- ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -97,10 +97,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 923616 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_8.q.out ql/src/test/results/clientpositive/llap/vectorization_8.q.out index 64480d7..f40c2ec 100644 --- ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -93,10 +93,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_9.q.out ql/src/test/results/clientpositive/llap/vectorization_9.q.out index dd2e5f0..43d3a82 100644 --- ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -97,10 +97,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 2048 Data size: 434588 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out index e6427fa..0b12efa 100644 --- ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/llap/vectorization_nested_udf.q.out @@ -53,10 +53,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out index 61c5051..164edc4 100644 --- ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_bucketmapjoin1.q.out @@ -208,6 +208,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -343,6 +346,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -479,6 +485,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator diff --git ql/src/test/results/clientpositive/llap/vectorized_case.q.out ql/src/test/results/clientpositive/llap/vectorized_case.q.out index aec161d..484be90 100644 --- ql/src/test/results/clientpositive/llap/vectorized_case.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_case.q.out @@ -315,10 +315,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap @@ -459,10 +458,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out index 15b62c9..d293e05 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_partition_pruning.q.out @@ -330,6 +330,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -473,6 +476,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -688,6 +694,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -707,6 +716,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -884,6 +896,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 2200 Data size: 809600 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -903,6 +918,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -1090,6 +1108,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1233,6 +1254,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1402,6 +1426,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1545,6 +1572,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1712,6 +1742,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1870,6 +1903,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2013,6 +2049,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2156,6 +2195,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2327,6 +2369,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2462,6 +2507,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2634,6 +2682,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2807,6 +2858,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -2965,6 +3019,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3103,6 +3160,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3241,6 +3301,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -3424,6 +3487,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1100 Data size: 404800 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3443,6 +3509,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3616,6 +3685,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col1 (type: string) Statistics: Num rows: 1 Data size: 202 Basic stats: COMPLETE Column stats: NONE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Operator Tree: @@ -3635,6 +3707,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: vectorized, llap Reduce Vectorization: @@ -3807,6 +3882,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4069,6 +4147,9 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: COMPLETE + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -4435,6 +4516,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 6 Execution mode: vectorized, llap Reduce Vectorization: @@ -5650,6 +5734,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out index 228bd9d..d0fecff 100644 --- ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_dynamic_semijoin_reduction.q.out @@ -180,6 +180,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -414,6 +417,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -648,6 +654,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -956,6 +965,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1248,6 +1260,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -1511,6 +1526,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_join46.q.out ql/src/test/results/clientpositive/llap/vectorized_join46.q.out index a134b19..0a8f4de 100644 --- ql/src/test/results/clientpositive/llap/vectorized_join46.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_join46.q.out @@ -19,6 +19,7 @@ POSTHOOK: Output: default@test1 POSTHOOK: Lineage: test1.col_1 SCRIPT [] POSTHOOK: Lineage: test1.key SCRIPT [] POSTHOOK: Lineage: test1.value SCRIPT [] +col1 col2 col3 PREHOOK: query: CREATE TABLE test2 (key INT, value INT, col_2 STRING) PREHOOK: type: CREATETABLE PREHOOK: Output: database:default @@ -40,16 +41,22 @@ POSTHOOK: Output: default@test2 POSTHOOK: Lineage: test2.col_2 SCRIPT [] POSTHOOK: Lineage: test2.key SCRIPT [] POSTHOOK: Lineage: test2.value SCRIPT [] -PREHOOK: query: EXPLAIN +col1 col2 col3 +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -67,9 +74,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -77,12 +91,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -90,23 +118,68 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -128,6 +201,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -136,20 +210,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -167,9 +246,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -180,12 +266,27 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableFilterExpressions: FilterLongColumnBetween(col 0:int, left 100, right 102) + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 0:int, 1:int, 2:string, 4:int, 5:int, 6:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 857 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -193,26 +294,75 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -238,6 +388,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL @@ -245,18 +396,23 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102 AND test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -274,9 +430,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -287,12 +450,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 6 Data size: 1142 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -300,24 +472,72 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Filter Operator + Filter Vectorization: + className: VectorFilterOperator + native: true + predicateExpression: FilterLongColumnBetween(col 0:int, left 100, right 102) predicate: key BETWEEN 100 AND 102 (type: boolean) Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 1 Data size: 95 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -342,22 +562,28 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND true) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -375,26 +601,62 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -402,12 +664,26 @@ STAGE PLANS: keys: 0 _col1 (type: int) 1 _col1 (type: int) + Map Join Vectorization: + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1, 2] + bigTableValueColumns: 0:int, 1:int, 2:string + className: VectorMapJoinOuterLongOperator + native: true + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + outerSmallTableKeyMapping: 1 -> 5 + projectedOutput: 4:int, 5:int, 6:string, 0:int, 1:int, 2:string + smallTableValueMapping: 4:int, 6:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 0 Map 1 Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 8 Data size: 1524 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -415,6 +691,22 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Stage: Stage-0 Fetch Operator @@ -436,6 +728,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del @@ -443,16 +736,21 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -470,9 +768,16 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: @@ -483,12 +788,21 @@ STAGE PLANS: keys: 0 1 + Map Join Vectorization: + bigTableValueExpressions: col 0:int, col 1:int, col 2:string + className: VectorMapJoinOuterFilteredOperator + native: false + nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nativeConditionsNotMet: Outer Join has keys IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 input vertices: 1 Map 2 Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false + File Sink Vectorization: + className: VectorFileSinkOperator + native: false Statistics: Num rows: 24 Data size: 4580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -496,21 +810,65 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: false + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [bigint, bigint, string] Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -533,6 +891,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -546,20 +905,25 @@ POSTHOOK: Input: default@test2 99 2 Mat NULL NULL NULL NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -601,21 +965,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -642,6 +1040,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -656,18 +1055,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -709,21 +1113,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -748,6 +1186,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -762,18 +1201,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 1' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -815,21 +1259,55 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -854,6 +1332,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -862,20 +1341,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 LEFT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -917,23 +1401,58 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Map 2 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Stage: Stage-0 Fetch Operator @@ -959,6 +1478,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -967,20 +1487,25 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -998,16 +1523,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1037,6 +1590,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1063,6 +1622,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1077,18 +1637,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1106,16 +1671,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1145,6 +1738,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1169,6 +1768,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1180,18 +1780,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Map 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1209,16 +1814,44 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1248,6 +1881,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1272,6 +1911,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1282,20 +1922,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 RIGHT OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1313,18 +1958,47 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 2 Map Operator Tree: TableScan @@ -1354,6 +2028,12 @@ STAGE PLANS: serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + notVectorizedReason: Unknown + vectorized: false Stage: Stage-0 Fetch Operator @@ -1379,26 +2059,32 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 101 2 Car 102 2 Del 101 2 Car 103 2 Ema 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102 OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1416,37 +2102,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1460,6 +2202,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1486,6 +2231,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1500,18 +2246,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test1.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1529,37 +2280,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1573,6 +2380,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1597,6 +2407,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -1611,18 +2422,23 @@ POSTHOOK: Input: default@test2 99 2 Mat 103 2 Ema NULL NULL None NULL NULL NULL Warning: Shuffle Join MERGEJOIN[9][tables = [$hdt$_0, $hdt$_1]] in Stage 'Reducer 2' is a cross product -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value OR test2.key between 100 and 102) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1640,37 +2456,93 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: + Reduce Sink Vectorization: + className: VectorReduceSinkEmptyKeyOperator + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 1:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1684,6 +2556,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1708,6 +2583,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema @@ -1718,20 +2594,25 @@ POSTHOOK: Input: default@test2 NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None NULL NULL None 102 2 Del -PREHOOK: query: EXPLAIN +PREHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) PREHOOK: type: QUERY -POSTHOOK: query: EXPLAIN +POSTHOOK: query: EXPLAIN VECTORIZATION DETAIL SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) POSTHOOK: type: QUERY +Explain +PLAN VECTORIZATION: + enabled: true + enabledConditionsMet: [hive.vectorized.execution.enabled IS true] + STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 @@ -1749,41 +2630,99 @@ STAGE PLANS: TableScan alias: test1 Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_1:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_1 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 6 Data size: 572 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_1:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Map 3 Map Operator Tree: TableScan alias: test2 Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE + TableScan Vectorization: + native: true + vectorizationSchemaColumns: [0:key:int, 1:value:int, 2:col_2:string, 3:ROW__ID:struct] Select Operator expressions: key (type: int), value (type: int), col_2 (type: string) outputColumnNames: _col0, _col1, _col2 + Select Vectorization: + className: VectorSelectOperator + native: true + projectedOutputColumnNums: [0, 1, 2] Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: int) sort order: + Map-reduce partition columns: _col1 (type: int) + Reduce Sink Vectorization: + className: VectorReduceSinkLongOperator + keyColumns: 1:int + native: true + nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true + valueColumns: 0:int, 2:string Statistics: Num rows: 4 Data size: 380 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int), _col2 (type: string) Execution mode: vectorized, llap LLAP IO: no inputs + Map Vectorization: + enabled: true + enabledConditionsMet: hive.vectorized.use.vector.serde.deserialize IS true + inputFormatFeatureSupport: [DECIMAL_64] + vectorizationSupportRemovedReasons: [DECIMAL_64 disabled because LLAP is enabled] + featureSupportInUse: [] + inputFileFormats: org.apache.hadoop.mapred.TextInputFormat + allNative: true + usesVectorUDFAdaptor: false + vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: key:int, value:int, col_2:string + partitionColumnCount: 0 + scratchColumnTypeNames: [] Reducer 2 Execution mode: llap Reduce Operator Tree: Merge Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1797,6 +2736,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1822,6 +2764,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +test1.key test1.value test1.col_1 test2.key test2.value test2.col_2 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out index 0dff57a..e8b41ce 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin.q.out @@ -55,6 +55,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out index f05e5c0..c1a41b6 100644 --- ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_mapjoin3.q.out @@ -145,13 +145,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -215,10 +217,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -279,10 +280,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 @@ -348,13 +348,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -418,10 +420,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -482,10 +483,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 @@ -551,13 +551,15 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:decimal(8,1) className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:decimal(8,1) + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Reducer 3 @@ -621,10 +623,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:int Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: int) Execution mode: vectorized, llap @@ -685,10 +686,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 4 Basic stats: COMPLETE Column stats: COMPLETE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out index ccf9aae..4fc70b9 100644 --- ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_ptf.q.out @@ -155,11 +155,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -376,10 +376,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 5902 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -420,10 +420,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -459,6 +458,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6021 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: llap Reduce Vectorization: @@ -628,11 +630,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -787,11 +789,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -1006,11 +1008,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1228,11 +1230,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -1452,11 +1454,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1497,10 +1499,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1572,6 +1573,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Stage: Stage-0 Fetch Operator @@ -1675,10 +1679,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -1711,11 +1714,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16094 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized, llap @@ -1757,6 +1760,9 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -2295,11 +2301,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2513,11 +2519,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -2801,11 +2807,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3023,11 +3029,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 6110 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -3068,10 +3074,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 104 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs @@ -3142,6 +3147,9 @@ STAGE PLANS: Map-reduce partition columns: _col2 (type: string) Statistics: Num rows: 27 Data size: 6237 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: int), _col7 (type: double) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 4 Execution mode: llap Reduce Vectorization: @@ -3316,11 +3324,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -3554,11 +3562,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 13 Data size: 2574 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double) Execution mode: vectorized, llap @@ -3817,11 +3825,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 6006 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized, llap @@ -4334,10 +4342,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -4652,10 +4660,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized, llap @@ -4966,10 +4974,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5288,10 +5296,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5621,10 +5629,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap @@ -5923,10 +5931,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 5798 Basic stats: COMPLETE Column stats: COMPLETE value expressions: p_size (type: int) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out index 72216d3..e075bf7 100644 --- ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_shufflejoin.q.out @@ -132,6 +132,9 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 92 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: bigint), _col1 (type: int), _col2 (type: int), _col3 (type: struct) + MergeJoin Vectorization: + enabled: false + enableConditionsNotMet: Vectorizing MergeJoin Supported IS false Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out index 3fb968f..3b36498 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp.q.out @@ -155,10 +155,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:timestamp, 1:timestamp Statistics: Num rows: 1 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp), _col1 (type: timestamp) Execution mode: vectorized, llap @@ -380,10 +379,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:struct Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct) Execution mode: vectorized, llap @@ -520,10 +518,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5, 6] + valueColumns: 0:struct, 1:struct, 2:struct, 3:struct, 4:struct, 5:struct, 6:struct Statistics: Num rows: 1 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: vectorized, llap diff --git ql/src/test/results/clientpositive/mapjoin46.q.out ql/src/test/results/clientpositive/mapjoin46.q.out index 61b579a..5a589cc 100644 --- ql/src/test/results/clientpositive/mapjoin46.q.out +++ ql/src/test/results/clientpositive/mapjoin46.q.out @@ -124,14 +124,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -233,12 +233,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[11][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -338,12 +338,12 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL +100 1 Bob 102 2 Del +101 2 Car 102 2 Del 98 NULL None NULL NULL NULL 99 0 Alice NULL NULL NULL 99 2 Mat NULL NULL NULL -100 1 Bob 102 2 Del -101 2 Car 102 2 Del +NULL NULL None NULL NULL NULL PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -428,10 +428,10 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product @@ -526,10 +526,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat NULL NULL NULL 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -538,6 +534,10 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat NULL NULL NULL +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -633,11 +633,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -646,6 +641,11 @@ NULL NULL None 102 2 Del 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -737,11 +737,6 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 102 2 Del 100 1 Bob 103 2 Ema 100 1 Bob 104 3 Fli @@ -750,6 +745,11 @@ NULL NULL None NULL NULL NULL 101 2 Car 103 2 Ema 101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -841,14 +841,14 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del 99 2 Mat 103 2 Ema -100 1 Bob 102 2 Del -101 2 Car 102 2 Del -101 2 Car 103 2 Ema +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 LEFT OUTER JOIN test2 @@ -942,13 +942,13 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None NULL NULL NULL -98 NULL None NULL NULL NULL -99 0 Alice NULL NULL NULL -99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL None NULL NULL NULL Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1044,19 +1044,19 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del -98 NULL None 102 2 Del -99 0 Alice 102 2 Del -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1148,16 +1148,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 100 1 Bob 102 2 Del -101 2 Car 102 2 Del -99 2 Mat 103 2 Ema 100 1 Bob 103 2 Ema -101 2 Car 103 2 Ema 100 1 Bob 104 3 Fli -101 2 Car 104 3 Fli 100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli 101 2 Car 105 NULL None +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema Warning: Map Join MAPJOIN[9][bigTable=?] in task 'Stage-3:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * @@ -1249,16 +1249,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL None 102 2 Del +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema 98 NULL None 102 2 Del 99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -100 1 Bob 102 2 Del -101 2 Car 102 2 Del 99 2 Mat 103 2 Ema -101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del PREHOOK: query: EXPLAIN SELECT * FROM test1 RIGHT OUTER JOIN test2 @@ -1352,9 +1352,9 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -99 2 Mat 102 2 Del 101 2 Car 102 2 Del 101 2 Car 103 2 Ema +99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL NULL 105 NULL None Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product @@ -1405,7 +1405,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1445,31 +1445,33 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema -99 2 Mat 102 2 Del -99 0 Alice 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1504,12 +1506,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1529,7 +1531,8 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1537,36 +1540,37 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test1.key between 100 and 102) + OR test1.key between 100 and 102 + OR test2.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 105 NULL None -101 2 Car 104 3 Fli -101 2 Car 103 2 Ema -101 2 Car 102 2 Del -100 1 Bob 105 NULL None -100 1 Bob 104 3 Fli -100 1 Bob 103 2 Ema 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None 102 2 Del +99 0 Alice 102 2 Del 99 2 Mat 102 2 Del -99 0 Alice NULL NULL NULL -98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL +99 2 Mat 103 2 Ema +NULL NULL None 102 2 Del Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1601,12 +1605,12 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false @@ -1626,7 +1630,7 @@ Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAP PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1634,34 +1638,36 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - OR test2.key between 100 and 102) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 100 1 Bob 102 2 Del -99 2 Mat 103 2 Ema +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del -99 0 Alice 102 2 Del -98 NULL None 102 2 Del -NULL NULL None 102 2 Del -NULL NULL NULL 105 NULL None -NULL NULL NULL 104 3 Fli +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY POSTHOOK: query: EXPLAIN SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY STAGE DEPENDENCIES: Stage-1 is a root stage @@ -1679,11 +1685,9 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) TableScan alias: test2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE @@ -1692,24 +1696,22 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator - key expressions: _col1 (type: int) - sort order: + - Map-reduce partition columns: _col1 (type: int) + sort order: Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col2 (type: string) + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: - 0 _col1 (type: int) - 1 _col1 (type: int) + 0 + 1 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + residual filter predicates: {((_col1 = _col4) or _col0 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1721,11 +1723,11 @@ STAGE PLANS: Processor Tree: ListSink +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) PREHOOK: type: QUERY PREHOOK: Input: default@test1 PREHOOK: Input: default@test2 @@ -1733,37 +1735,426 @@ PREHOOK: Input: default@test2 POSTHOOK: query: SELECT * FROM test1 FULL OUTER JOIN test2 ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) + OR test1.key between 100 and 102) POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### +100 1 Bob 102 2 Del +100 1 Bob 103 2 Ema +100 1 Bob 104 3 Fli +100 1 Bob 105 NULL None +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +101 2 Car 104 3 Fli +101 2 Car 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema -101 2 Car 102 2 Del 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli -Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +99 2 Mat 103 2 Ema +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product PREHOOK: query: EXPLAIN SELECT * -FROM ( - SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, - test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 - FROM test1 RIGHT OUTER JOIN test2 - ON (test1.value=test2.value - AND (test1.key between 100 and 102 - OR test2.key between 100 and 102)) - ) sq1 -FULL OUTER JOIN ( - SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, - test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 - FROM test1 LEFT OUTER JOIN test2 - ON (test1.value=test2.value +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {((_col1 = _col4) or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 24 Data size: 476 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Stage 'Stage-1:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + OR test2.key between 100 and 102) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob 102 2 Del +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None 102 2 Del +99 0 Alice 102 2 Del +99 2 Mat 102 2 Del +99 2 Mat 103 2 Ema +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None 102 2 Del +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +PREHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col1 (type: int) + sort order: + + Map-reduce partition columns: _col1 (type: int) + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col2 (type: string) + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM test1 FULL OUTER JOIN test2 +ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### +100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del +101 2 Car 103 2 Ema +98 NULL None NULL NULL NULL +99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli +NULL NULL NULL 105 NULL None +NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value AND (test1.key between 100 and 102 OR test2.key between 100 and 102)) ) sq2 @@ -1878,7 +2269,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1944,23 +2335,239 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@test1 POSTHOOK: Input: default@test2 #### A masked pattern was here #### -NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema -NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del -NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL -NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del -NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL -NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL -NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL -NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema -NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL +101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL +99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL -NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del -NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL -101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL -101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-8 is a root stage + Stage-2 depends on stages: Stage-8 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + $hdt$_0:test1 + Fetch Operator + limit: -1 + $hdt$_2:$hdt$_3:test2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + $hdt$_0:test1 + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + $hdt$_2:$hdt$_3:test2 + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + HashTable Sink Operator + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + alias: test2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_2 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 4 Data size: 38 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Right Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + TableScan + alias: test1 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: key (type: int), value (type: int), col_1 (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 6 Data size: 56 Basic stats: COMPLETE Column stats: NONE + Map Join Operator + condition map: + Left Outer Join 0 to 1 + keys: + 0 _col1 (type: int) + 1 _col1 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + residual filter predicates: {(_col0 BETWEEN 100 AND 102 or _col3 BETWEEN 100 AND 102)} + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 6 Data size: 61 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int), _col5 (type: string) + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Join Operator + condition map: + Full Outer Join 0 to 1 + keys: + 0 + 1 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + residual filter predicates: {(_col1 is null or (_col10 is null and (_col7 <> _col4)))} + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 36 Data size: 768 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +Warning: Shuffle Join JOIN[17][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Stage 'Stage-2:MAPRED' is a cross product +PREHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +PREHOOK: type: QUERY +PREHOOK: Input: default@test1 +PREHOOK: Input: default@test2 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * +FROM ( + SELECT test1.key AS key1, test1.value AS value1, test1.col_1 AS col_1, + test2.key AS key2, test2.value AS value2, test2.col_2 AS col_2 + FROM test1 RIGHT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq1 +FULL OUTER JOIN ( + SELECT test1.key AS key3, test1.value AS value3, test1.col_1 AS col_3, + test2.key AS key4, test2.value AS value4, test2.col_2 AS col_4 + FROM test1 LEFT OUTER JOIN test2 + ON (test1.value=test2.value + AND (test1.key between 100 and 102 + OR test2.key between 100 and 102)) + ) sq2 +ON (sq1.value1 is null or sq2.value4 is null and sq2.value3 != sq1.value2) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@test1 +POSTHOOK: Input: default@test2 +#### A masked pattern was here #### 101 2 Car 102 2 Del 100 1 Bob NULL NULL NULL 101 2 Car 102 2 Del 99 0 Alice NULL NULL NULL +101 2 Car 103 2 Ema 100 1 Bob NULL NULL NULL +101 2 Car 103 2 Ema 99 0 Alice NULL NULL NULL 99 2 Mat 102 2 Del 100 1 Bob NULL NULL NULL 99 2 Mat 102 2 Del 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 100 1 Bob NULL NULL NULL +NULL NULL NULL 104 3 Fli 101 2 Car 102 2 Del +NULL NULL NULL 104 3 Fli 101 2 Car 103 2 Ema +NULL NULL NULL 104 3 Fli 98 NULL None NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 0 Alice NULL NULL NULL +NULL NULL NULL 104 3 Fli 99 2 Mat 102 2 Del +NULL NULL NULL 104 3 Fli NULL NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 100 1 Bob NULL NULL NULL +NULL NULL NULL 105 NULL None 101 2 Car 102 2 Del +NULL NULL NULL 105 NULL None 101 2 Car 103 2 Ema +NULL NULL NULL 105 NULL None 98 NULL None NULL NULL NULL +NULL NULL NULL 105 NULL None 99 0 Alice NULL NULL NULL +NULL NULL NULL 105 NULL None 99 2 Mat 102 2 Del +NULL NULL NULL 105 NULL None NULL NULL None NULL NULL NULL diff --git ql/src/test/results/clientpositive/mapjoin47.q.out ql/src/test/results/clientpositive/mapjoin47.q.out index af7f20f..5569370 100644 --- ql/src/test/results/clientpositive/mapjoin47.q.out +++ ql/src/test/results/clientpositive/mapjoin47.q.out @@ -1405,7 +1405,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1511,7 +1511,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/mergejoin.q.out ql/src/test/results/clientpositive/mergejoin.q.out index 664becb..172a64e 100644 --- ql/src/test/results/clientpositive/mergejoin.q.out +++ ql/src/test/results/clientpositive/mergejoin.q.out @@ -1706,7 +1706,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/mergejoins_mixed.q.out ql/src/test/results/clientpositive/mergejoins_mixed.q.out index a5cc23a..b70c909 100644 --- ql/src/test/results/clientpositive/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/mergejoins_mixed.q.out @@ -802,7 +802,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -841,7 +841,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1139,7 +1139,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1437,7 +1437,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/optional_outer.q.out ql/src/test/results/clientpositive/optional_outer.q.out index 9ec1af7..efc952c 100644 --- ql/src/test/results/clientpositive/optional_outer.q.out +++ ql/src/test/results/clientpositive/optional_outer.q.out @@ -283,7 +283,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -344,7 +344,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query51.q.out ql/src/test/results/clientpositive/perf/spark/query51.q.out index 4a13589..49c8240 100644 --- ql/src/test/results/clientpositive/perf/spark/query51.q.out +++ ql/src/test/results/clientpositive/perf/spark/query51.q.out @@ -263,7 +263,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: string) 1 _col0 (type: int), _col1 (type: string) diff --git ql/src/test/results/clientpositive/perf/spark/query97.q.out ql/src/test/results/clientpositive/perf/spark/query97.q.out index 14a2aed..6c5af53 100644 --- ql/src/test/results/clientpositive/perf/spark/query97.q.out +++ ql/src/test/results/clientpositive/perf/spark/query97.q.out @@ -192,7 +192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: int), _col1 (type: int) 1 _col0 (type: int), _col1 (type: int) diff --git ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out index 214f635..a6ad430 100644 --- ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out +++ ql/src/test/results/clientpositive/skewjoin_mapjoin2.q.out @@ -222,7 +222,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -297,7 +297,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/skewjoinopt3.q.out ql/src/test/results/clientpositive/skewjoinopt3.q.out index 0730cb1..50b25f2 100644 --- ql/src/test/results/clientpositive/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/skewjoinopt3.q.out @@ -245,7 +245,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -320,7 +320,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/smb_mapjoin_1.q.out index 1182e56..a8b3f9d 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_1.q.out @@ -270,7 +270,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -491,7 +491,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/smb_mapjoin_2.q.out index 1dfacda..5d4468b 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_2.q.out @@ -231,7 +231,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -456,7 +456,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/smb_mapjoin_3.q.out index cf4c744..1a0a0cf 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_3.q.out @@ -230,7 +230,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -453,7 +453,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_46.q.out ql/src/test/results/clientpositive/smb_mapjoin_46.q.out index 1302360..302f00b 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_46.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_46.q.out @@ -1316,7 +1316,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1415,7 +1415,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1512,7 +1512,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 1 @@ -1611,7 +1611,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_47.q.out ql/src/test/results/clientpositive/smb_mapjoin_47.q.out index c7334a8..825cd0e 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_47.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_47.q.out @@ -1330,7 +1330,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) @@ -1436,7 +1436,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col1 (type: int) 1 _col1 (type: int) diff --git ql/src/test/results/clientpositive/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/smb_mapjoin_7.q.out index 83033b0..ef5cca6 100644 --- ql/src/test/results/clientpositive/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/smb_mapjoin_7.q.out @@ -633,7 +633,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out index 118a48e..217a72d 100644 --- ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out +++ ql/src/test/results/clientpositive/spark/annotate_stats_join.q.out @@ -968,7 +968,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string), _col1 (type: int) 1 _col1 (type: string), _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/auto_join18.q.out ql/src/test/results/clientpositive/spark/auto_join18.q.out index 1b6cc08..5cb783a 100644 --- ql/src/test/results/clientpositive/spark/auto_join18.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18.q.out @@ -96,7 +96,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out index 17f0d88..e14f1a6 100644 --- ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/auto_join18_multi_distinct.q.out @@ -99,7 +99,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join6.q.out ql/src/test/results/clientpositive/spark/auto_join6.q.out index 67bdc58..899f21a 100644 --- ql/src/test/results/clientpositive/spark/auto_join6.q.out +++ ql/src/test/results/clientpositive/spark/auto_join6.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join7.q.out ql/src/test/results/clientpositive/spark/auto_join7.q.out index a410966..dde9608 100644 --- ql/src/test/results/clientpositive/spark/auto_join7.q.out +++ ql/src/test/results/clientpositive/spark/auto_join7.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/auto_join_filters.q.out ql/src/test/results/clientpositive/spark/auto_join_filters.q.out index 8ae5a0e..1e1788d 100644 --- ql/src/test/results/clientpositive/spark/auto_join_filters.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_filters.q.out @@ -54,6 +54,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -198,6 +208,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -340,6 +386,16 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +Warning: Shuffle Join JOIN[6][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b on a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 @@ -484,6 +540,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4939870 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value AND a.key > 40 AND a.value > 50 AND a.key = a.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value AND c.key > 40 AND c.value > 50 AND c.key = c.value AND b.key > 40 AND b.value > 50 AND b.key = b.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out index 32a885b..c8bd2d8 100644 --- ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out +++ ql/src/test/results/clientpositive/spark/auto_join_nulls.q.out @@ -188,6 +188,42 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@myinput1 #### A masked pattern was here #### 4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.key = b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 +PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@myinput1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) FROM myinput1 a FULL OUTER JOIN myinput1 b ON a.value = b.value and a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@myinput1 +#### A masked pattern was here #### +4543526 PREHOOK: query: SELECT sum(hash(a.key,a.value,b.key,b.value)) from myinput1 a LEFT OUTER JOIN myinput1 b ON (a.value=b.value) RIGHT OUTER JOIN myinput1 c ON (b.value=c.value) PREHOOK: type: QUERY PREHOOK: Input: default@myinput1 diff --git ql/src/test/results/clientpositive/spark/join18.q.out ql/src/test/results/clientpositive/spark/join18.q.out index 1a949b2..cbb0d65 100644 --- ql/src/test/results/clientpositive/spark/join18.q.out +++ ql/src/test/results/clientpositive/spark/join18.q.out @@ -95,7 +95,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out index d0ae0ba..4e0b290 100644 --- ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out +++ ql/src/test/results/clientpositive/spark/join18_multi_distinct.q.out @@ -98,7 +98,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join6.q.out ql/src/test/results/clientpositive/spark/join6.q.out index 3f884ca..eecacea 100644 --- ql/src/test/results/clientpositive/spark/join6.q.out +++ ql/src/test/results/clientpositive/spark/join6.q.out @@ -88,7 +88,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join7.q.out ql/src/test/results/clientpositive/spark/join7.q.out index d43c5cd..0012b41 100644 --- ql/src/test/results/clientpositive/spark/join7.q.out +++ ql/src/test/results/clientpositive/spark/join7.q.out @@ -116,7 +116,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 0 to 2 keys: 0 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out index 9b8b69c..2410c98 100644 --- ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out +++ ql/src/test/results/clientpositive/spark/join_filters_overlap.q.out @@ -1227,7 +1227,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 Left Outer Join 0 to 3 filter mappings: diff --git ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out index c53cd00..681ed0d 100644 --- ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out +++ ql/src/test/results/clientpositive/spark/mergejoins_mixed.q.out @@ -849,7 +849,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col3 (type: string) 1 _col0 (type: string) @@ -865,7 +865,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1147,7 +1147,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) @@ -1429,7 +1429,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col0 (type: string) 1 _col0 (type: string) diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out index f0318a3..3e4a325 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -562,10 +559,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -617,10 +613,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -741,10 +737,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -796,10 +791,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1062,10 +1056,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1117,10 +1110,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1241,10 +1234,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1296,10 +1288,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1607,10 +1598,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out index 18379f2..173835f 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_1.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out index df85ce3..aa43219 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_12.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 3754 Data size: 45048 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out index e4db32c..24dc741 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_13.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 32760 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out index aef374a..0b73789 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_14.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 606 Data size: 7272 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out index 24cdf06..df258c3 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_15.q.out @@ -119,10 +119,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out index a35c9c5..f813705 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_16.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out index 9e9f4df..162e634 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_17.q.out @@ -90,10 +90,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out index 8b3c5f2..88e18ed 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_2.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out index dd3532b..9739650 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_3.q.out @@ -103,10 +103,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out index 4a7b0e0..6853f98 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_4.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out index 56c62c3..86bbaea 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_5.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out index 52b8126..3ba9b61 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_7.q.out @@ -96,10 +96,9 @@ STAGE PLANS: sort order: +++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [10, 3, 1, 0, 8, 6, 14, 15, 16, 17, 19, 20, 18, 21, 23] + keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 14:bigint, 15:int, 16:smallint, 17:tinyint, 19:int, 20:bigint, 18:int, 21:tinyint, 23:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 5461 Data size: 65532 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out index f76df32..99aa65c 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_8.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: ++++++++++++++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [8, 5, 10, 6, 4, 13, 14, 15, 17, 19, 16, 18, 20, 22] + keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 13:double, 14:double, 15:double, 17:float, 19:double, 16:double, 18:float, 20:float, 22:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.1 Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out index a35c9c5..f813705 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_9.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 49152 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out index ce188a0..ae52bc5 100644 --- ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out +++ ql/src/test/results/clientpositive/spark/parquet_vectorization_limit.q.out @@ -120,10 +120,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumns: 0:tinyint, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:smallint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -276,11 +276,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:tinyint + valueColumns: 1:struct Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -436,10 +436,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -592,11 +591,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -792,10 +790,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 12288 Data size: 147456 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -849,10 +847,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 6144 Data size: 73728 Basic stats: COMPLETE Column stats: NONE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/spark/semijoin.q.out ql/src/test/results/clientpositive/spark/semijoin.q.out index 6932efa..34ae2bf 100644 --- ql/src/test/results/clientpositive/spark/semijoin.q.out +++ ql/src/test/results/clientpositive/spark/semijoin.q.out @@ -1857,7 +1857,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Semi Join 1 to 2 keys: 0 key (type: int) @@ -2256,7 +2256,7 @@ STAGE PLANS: Join Operator condition map: Left Semi Join 0 to 1 - Outer Join 0 to 2 + Full Outer Join 0 to 2 keys: 0 key (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out index 3cd65b9..31ccddc 100644 --- ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out +++ ql/src/test/results/clientpositive/spark/skewjoinopt3.q.out @@ -271,7 +271,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -285,7 +285,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 outputColumnNames: _col0, _col1, _col2, _col3 Statistics: Num rows: 1 Data size: 330 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out index ec10c44..9575dd9 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_1.q.out @@ -291,7 +291,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -536,7 +536,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out index 0bcd167..a591f76 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_2.q.out @@ -252,7 +252,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -501,7 +501,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2080 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out index ad879a8..0b4a94f 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_3.q.out @@ -251,7 +251,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2220 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) @@ -498,7 +498,7 @@ STAGE PLANS: Statistics: Num rows: 1 Data size: 2060 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out index 21171db..d2a67f9 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_4.q.out @@ -542,7 +542,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -974,7 +974,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1192,7 +1192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1301,7 +1301,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1407,8 +1407,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out index 348d165..18dff0e 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_5.q.out @@ -542,7 +542,7 @@ STAGE PLANS: Join Operator condition map: Left Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -974,7 +974,7 @@ STAGE PLANS: Join Operator condition map: Right Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) @@ -1192,7 +1192,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Left Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1301,7 +1301,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 Right Outer Join 1 to 2 keys: 0 _col0 (type: int) @@ -1407,8 +1407,8 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 - Outer Join 1 to 2 + Full Outer Join 0 to 1 + Full Outer Join 1 to 2 keys: 0 _col0 (type: int) 1 _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out index 610abab..1732927 100644 --- ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out +++ ql/src/test/results/clientpositive/spark/smb_mapjoin_7.q.out @@ -631,7 +631,7 @@ STAGE PLANS: Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Sorted Merge Bucket Map Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 key (type: int) 1 key (type: int) diff --git ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out index 1916d25..cff06ff 100644 --- ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out +++ ql/src/test/results/clientpositive/spark/spark_vectorized_dynamic_partition_pruning.q.out @@ -89,10 +89,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -372,10 +371,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -424,10 +422,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -575,10 +572,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -627,10 +623,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -857,11 +852,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [5] + keyColumns: 5:int keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -910,11 +904,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1062,11 +1055,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [5] + keyColumns: 5:int keyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1115,11 +1107,10 @@ STAGE PLANS: Map-reduce partition columns: day(_col0) (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [3] + keyColumns: 3:int keyExpressions: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1344,11 +1335,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [8] + keyColumns: 8:bigint keyExpressions: FuncAbsLongToLong(col 5:bigint)(children: LongColAddLongScalar(col 8:bigint, val 10)(children: LongColUnaryMinus(col 5:bigint)(children: CastStringToLong(col 7:string)(children: StringGroupColConcatStringScalar(col 6:string, val 0)(children: CastLongToString(col 5:int)(children: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int) -> 6:string) -> 7:string) -> 5:bigint) -> 8:bigint) -> 5:bigint) -> 8:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1397,11 +1387,10 @@ STAGE PLANS: Map-reduce partition columns: abs(((- UDFToLong(concat(UDFToString(day(_col0)), '0'))) + 10)) (type: bigint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [6] + keyColumns: 6:bigint keyExpressions: FuncAbsLongToLong(col 3:bigint)(children: LongColAddLongScalar(col 6:bigint, val 10)(children: LongColUnaryMinus(col 3:bigint)(children: CastStringToLong(col 5:string)(children: StringGroupColConcatStringScalar(col 4:string, val 0)(children: CastLongToString(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:string) -> 5:string) -> 3:bigint) -> 6:bigint) -> 3:bigint) -> 6:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1619,11 +1608,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( UDFToShort(day(_col0)) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:decimal(10,0) keyExpressions: CastLongToDecimal(col 5:smallint)(children: col 5:int) -> 6:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1672,11 +1660,10 @@ STAGE PLANS: Map-reduce partition columns: CAST( day(_col0) AS decimal(10,0)) (type: decimal(10,0)) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [4] + keyColumns: 4:decimal(10,0) keyExpressions: CastLongToDecimal(col 3:int)(children: VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int) -> 4:decimal(10,0) native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1958,10 +1945,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2011,10 +1998,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2062,10 +2048,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2240,10 +2225,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -2293,10 +2278,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2344,10 +2328,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2619,10 +2602,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2671,10 +2653,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2822,10 +2803,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -2874,10 +2854,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3103,10 +3082,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3155,10 +3133,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3306,10 +3283,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3358,10 +3334,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3588,11 +3563,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3641,11 +3615,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3862,11 +3835,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3915,10 +3887,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4066,11 +4037,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(_col0) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [5] + keyColumns: 5:double keyExpressions: CastStringToDouble(col 3:string) -> 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4119,11 +4089,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [3] + keyColumns: 3:double keyExpressions: CastLongToDouble(col 4:int)(children: CastDoubleToLong(col 3:double)(children: DoubleColDivideDoubleScalar(col 0:double, val 2.0) -> 3:double) -> 4:int) -> 3:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4271,11 +4240,10 @@ STAGE PLANS: Map-reduce partition columns: (UDFToDouble(_col0) * 2.0D) (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [6] + keyColumns: 6:double keyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4324,10 +4292,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4554,11 +4521,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToString((UDFToDouble(_col0) * 2.0D)) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [7] + keyColumns: 7:string keyExpressions: CastDoubleToString(col 6:double)(children: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double) -> 7:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4607,11 +4573,10 @@ STAGE PLANS: Map-reduce partition columns: UDFToString(_col0) (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [4] + keyColumns: 4:string keyExpressions: CastDoubleToString(col 0:double) -> 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 94 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4766,10 +4731,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4821,10 +4784,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -4942,10 +4904,8 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -5018,10 +4978,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2, 3] + valueColumns: 2:string, 3:string Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col1 (type: string) Execution mode: vectorized @@ -5069,10 +5028,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 2] + valueColumns: 0:string, 2:string Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string), _col2 (type: string) Execution mode: vectorized @@ -5319,10 +5277,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 3] + keyColumns: 2:string, 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5371,10 +5328,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col2 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 2] + keyColumns: 0:string, 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 360 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5591,10 +5547,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5643,10 +5598,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5851,10 +5805,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -5894,10 +5847,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6095,10 +6047,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6147,10 +6098,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6413,10 +6363,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3] + valueColumns: 3:string Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string) Execution mode: vectorized @@ -6466,10 +6416,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6517,10 +6466,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6698,10 +6646,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 172 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6749,10 +6696,10 @@ STAGE PLANS: Map-reduce partition columns: _col1 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [3] + keyColumns: 3:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [2] + valueColumns: 2:string Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -6799,10 +6746,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -6976,10 +6922,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7031,10 +6976,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7227,10 +7171,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7281,10 +7224,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7336,10 +7278,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7473,10 +7414,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -7531,10 +7471,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -7613,10 +7552,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7668,10 +7606,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7864,10 +7801,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -7918,10 +7854,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -7973,10 +7908,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8112,10 +8046,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -8170,10 +8103,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8253,10 +8185,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8308,10 +8239,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8508,10 +8438,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -8562,10 +8491,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8617,10 +8545,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -8674,10 +8601,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Reducer 3 Reduce Vectorization: @@ -8754,10 +8680,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 9 Execution mode: vectorized @@ -8812,10 +8737,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 @@ -8965,10 +8889,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -8988,10 +8915,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9208,11 +9134,14 @@ STAGE PLANS: 0 day(_col0) (type: int) 1 day(_col0) (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [5] + bigTableKeyColumns: 5:int bigTableKeyExpressions: VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 5:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9232,10 +9161,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9515,13 +9443,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -9533,10 +9463,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 2420 Data size: 25709 Basic stats: COMPLETE Column stats: NONE @@ -9556,10 +9489,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -9801,10 +9733,13 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: string) 1 _col0 (type: string), _col2 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2, 3] + bigTableKeyColumns: 2:string, 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -9824,10 +9759,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10043,10 +9977,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10066,10 +10003,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10277,11 +10213,14 @@ STAGE PLANS: 0 UDFToDouble(_col0) (type: double) 1 UDFToDouble(UDFToInteger((_col0 / 2.0D))) (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [5] + bigTableKeyColumns: 5:double bigTableKeyExpressions: CastStringToDouble(col 3:string) -> 5:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10301,10 +10240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10511,11 +10449,14 @@ STAGE PLANS: 0 (UDFToDouble(_col0) * 2.0D) (type: double) 1 _col0 (type: double) Map Join Vectorization: - bigTableKeyColumnNums: [6] + bigTableKeyColumns: 6:double bigTableKeyExpressions: DoubleColMultiplyDoubleScalar(col 5:double, val 2.0)(children: CastStringToDouble(col 3:string) -> 5:double) -> 6:double + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -10535,10 +10476,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -10687,10 +10627,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1000 Data size: 10624 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -10781,9 +10720,12 @@ STAGE PLANS: 0 1 Map Join Vectorization: + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Reducer 4 Statistics: Num rows: 500000 Data size: 11124000 Basic stats: COMPLETE Column stats: NONE @@ -10803,10 +10745,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11017,10 +10958,13 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11040,10 +10984,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11208,10 +11151,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 3 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11231,10 +11176,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11398,10 +11342,12 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinOuterStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED input vertices: 0 Map 1 Statistics: Num rows: 2200 Data size: 23372 Basic stats: COMPLETE Column stats: NONE @@ -11421,10 +11367,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11684,13 +11629,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:string + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:string className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11702,10 +11649,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [3] + bigTableKeyColumns: 3:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1210 Data size: 12854 Basic stats: COMPLETE Column stats: NONE @@ -11725,10 +11675,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -11968,12 +11917,15 @@ STAGE PLANS: 0 _col0 (type: string) 1 _col1 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [0] + bigTableKeyColumns: 0:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [4] - smallTableMapping: [4] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -11985,10 +11937,13 @@ STAGE PLANS: 0 _col1 (type: string) 1 _col0 (type: string) Map Join Vectorization: - bigTableKeyColumnNums: [4] + bigTableKeyColumns: 4:string + bigTableRetainColumnNums: [] className: VectorMapJoinInnerBigOnlyStringOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + nonOuterSmallTableKeyMapping: [] + hashTableImplementationType: OPTIMIZED input vertices: 1 Map 4 Statistics: Num rows: 1 Data size: 207 Basic stats: PARTIAL Column stats: NONE @@ -12008,10 +11963,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -12145,10 +12099,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12200,10 +12153,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12396,10 +12348,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2000 Data size: 21248 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -12450,10 +12401,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12505,10 +12455,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:string Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) Execution mode: vectorized @@ -12644,10 +12593,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Reducer 7 Execution mode: vectorized @@ -12702,10 +12650,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [0] + keyColumns: 0:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 2 Data size: 368 Basic stats: COMPLETE Column stats: NONE Stage: Stage-0 diff --git ql/src/test/results/clientpositive/spark/subquery_scalar.q.out ql/src/test/results/clientpositive/spark/subquery_scalar.q.out index 7488f2e..34c4223 100644 --- ql/src/test/results/clientpositive/spark/subquery_scalar.q.out +++ ql/src/test/results/clientpositive/spark/subquery_scalar.q.out @@ -1477,7 +1477,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (100 < _col1) (type: boolean) + predicate: (_col1 > 100) (type: boolean) Statistics: Num rows: 4 Data size: 484 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string) @@ -2988,7 +2988,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: int) 1 _col2 (type: int) @@ -3083,8 +3083,7 @@ POSTHOOK: Input: default@part 85768 86428 90681 -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY POSTHOOK: query: explain select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) @@ -3097,12 +3096,11 @@ STAGE PLANS: Stage: Stage-1 Spark Edges: - Reducer 10 <- Map 9 (GROUP, 2) - Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 6 (PARTITION-LEVEL SORT, 1) - Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 1), Reducer 8 (PARTITION-LEVEL SORT, 1) - Reducer 4 <- Reducer 10 (PARTITION-LEVEL SORT, 2), Reducer 3 (PARTITION-LEVEL SORT, 2) - Reducer 6 <- Map 5 (GROUP, 1) - Reducer 8 <- Map 7 (GROUP, 1) + Reducer 2 <- Map 1 (PARTITION-LEVEL SORT, 1), Reducer 5 (PARTITION-LEVEL SORT, 1), Reducer 7 (PARTITION-LEVEL SORT, 1) + Reducer 3 <- Reducer 2 (PARTITION-LEVEL SORT, 2), Reducer 9 (PARTITION-LEVEL SORT, 2) + Reducer 5 <- Map 4 (GROUP, 1) + Reducer 7 <- Map 6 (GROUP, 1) + Reducer 9 <- Map 8 (GROUP, 2) #### A masked pattern was here #### Vertices: Map 1 @@ -3118,7 +3116,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 32560 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Map 5 + Map 4 Map Operator Tree: TableScan alias: part_null @@ -3136,7 +3134,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Map 7 + Map 6 Map Operator Tree: TableScan alias: part @@ -3154,7 +3152,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) - Map 9 + Map 8 Map Operator Tree: TableScan alias: part @@ -3173,61 +3171,28 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: string) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE - Reducer 10 - Reduce Operator Tree: - Group By Operator - keys: KEY._col0 (type: string) - mode: mergepartial - outputColumnNames: _col0 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: string), true (type: boolean) - outputColumnNames: _col0, _col1 - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - key expressions: _col0 (type: string) - sort order: + - Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: boolean) - Reducer 2 + Reducer 2 Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 + Inner Join 0 to 2 keys: 0 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE + 2 + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not (_col1 like _col9)) (type: boolean) - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - Select Operator - expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - Reduce Output Operator - sort order: - Statistics: Num rows: 1 Data size: 32745 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) - Reducer 3 - Reduce Operator Tree: - Join Operator - condition map: - Inner Join 0 to 1 - keys: - 0 - 1 - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11 - Statistics: Num rows: 1 Data size: 32762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col3 (type: string) sort order: + Map-reduce partition columns: _col3 (type: string) - Statistics: Num rows: 1 Data size: 32762 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 32761 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string), _col10 (type: bigint), _col11 (type: bigint) - Reducer 4 + Reducer 3 Reduce Operator Tree: Join Operator condition map: @@ -3238,7 +3203,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col10, _col11, _col13 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: ((_col10 = 0) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) + predicate: ((_col10 = 0L) or (_col13 is null and _col3 is not null and (_col11 >= _col10))) (type: boolean) Statistics: Num rows: 9 Data size: 1112 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -3251,7 +3216,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Reducer 6 + Reducer 5 Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0) @@ -3262,7 +3227,7 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: string) - Reducer 8 + Reducer 7 Reduce Operator Tree: Group By Operator aggregations: count(VALUE._col0), count(VALUE._col1) @@ -3273,6 +3238,23 @@ STAGE PLANS: sort order: Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) + Reducer 9 + Reduce Operator Tree: + Group By Operator + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: string), true (type: boolean) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 13 Data size: 1573 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: boolean) Stage: Stage-0 Fetch Operator @@ -3280,8 +3262,7 @@ STAGE PLANS: Processor Tree: ListSink -Warning: Shuffle Join JOIN[10][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 2' is a cross product -Warning: Shuffle Join JOIN[27][tables = [$hdt$_0, $hdt$_1]] in Work 'Reducer 3' is a cross product +Warning: Shuffle Join JOIN[23][tables = [$hdt$_0, $hdt$_1, $hdt$_2]] in Work 'Reducer 2' is a cross product PREHOOK: query: select * from part_null where p_name NOT LIKE (select min(p_name) from part_null) AND p_brand NOT IN (select p_name from part) PREHOOK: type: QUERY PREHOOK: Input: default@part @@ -3453,7 +3434,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col12 Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (not CASE WHEN ((_col9 = 0)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) + predicate: (not CASE WHEN ((_col9 = 0L)) THEN (false) WHEN (_col12 is not null) THEN (true) WHEN (_col3 is null) THEN (null) WHEN ((_col10 < _col9)) THEN (true) ELSE (false) END) (type: boolean) Statistics: Num rows: 7 Data size: 865 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: double), _col8 (type: string) @@ -3469,7 +3450,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col4 (type: string) 1 _col2 (type: string) @@ -4127,7 +4108,7 @@ STAGE PLANS: alias: part Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (p_name is not null and p_type is not null) (type: boolean) + predicate: p_type is not null (type: boolean) Statistics: Num rows: 26 Data size: 3147 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: p_name (type: string), p_brand (type: string), p_type (type: string) @@ -4180,7 +4161,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Left Outer Join 0 to 1 + Inner Join 0 to 1 keys: 0 _col2 (type: string) 1 _col2 (type: string) @@ -4192,6 +4173,9 @@ STAGE PLANS: Select Operator expressions: _col0 (type: string), _col2 (type: string) outputColumnNames: _col0, _col1 + Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (_col0 is not null and _col1 is not null) (type: boolean) Statistics: Num rows: 14 Data size: 1730 Basic stats: COMPLETE Column stats: NONE Group By Operator keys: _col0 (type: string), _col1 (type: string) @@ -6109,7 +6093,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Filter Operator - predicate: (0.0 = _col1) (type: boolean) + predicate: (_col1 = 0.0D) (type: boolean) Statistics: Num rows: 1 Data size: 7 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: int) @@ -6344,7 +6328,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Filter Operator - predicate: (0 = _col1) (type: boolean) + predicate: (_col1 = 0) (type: boolean) Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE Select Operator expressions: _col0 (type: int) diff --git ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out index 98c709c..a9f5047 100644 --- ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out +++ ql/src/test/results/clientpositive/spark/vector_decimal_aggregate.q.out @@ -99,10 +99,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:bigint, 6:decimal(23,14), 7:decimal(23,14), 8:decimal(33,14), 9:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: bigint), _col6 (type: decimal(23,14)), _col7 (type: decimal(23,14)), _col8 (type: decimal(33,14)), _col9 (type: bigint) Execution mode: vectorized @@ -276,10 +276,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(20,10), 3:decimal(20,10), 4:decimal(30,10), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(23,14), 10:decimal(23,14), 11:decimal(33,14), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 2165060 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(20,10)), _col3 (type: decimal(20,10)), _col4 (type: decimal(30,10)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(23,14)), _col10 (type: decimal(23,14)), _col11 (type: decimal(33,14)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized @@ -459,10 +459,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:bigint, 6:decimal(16,0), 7:decimal(16,0), 8:decimal(26,0), 9:bigint Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: bigint), _col6 (type: decimal(16,0)), _col7 (type: decimal(16,0)), _col8 (type: decimal(26,0)), _col9 (type: bigint) Execution mode: vectorized @@ -655,10 +655,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] + valueColumns: 1:bigint, 2:decimal(11,5), 3:decimal(11,5), 4:decimal(21,5), 5:struct, 6:struct, 7:struct, 8:bigint, 9:decimal(16,0), 10:decimal(16,0), 11:decimal(26,0), 12:struct, 13:struct, 14:struct, 15:bigint Statistics: Num rows: 12289 Data size: 346472 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: decimal(11,5)), _col3 (type: decimal(11,5)), _col4 (type: decimal(21,5)), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: bigint), _col9 (type: decimal(16,0)), _col10 (type: decimal(16,0)), _col11 (type: decimal(26,0)), _col12 (type: struct), _col13 (type: struct), _col14 (type: struct), _col15 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_inner_join.q.out ql/src/test/results/clientpositive/spark/vector_inner_join.q.out index 168aa77..afe3b1f 100644 --- ql/src/test/results/clientpositive/spark/vector_inner_join.q.out +++ ql/src/test/results/clientpositive/spark/vector_inner_join.q.out @@ -135,13 +135,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 0 Map 1 @@ -315,13 +317,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:int className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -511,13 +515,15 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [] className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2 input vertices: 1 Map 2 @@ -679,14 +685,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -840,14 +848,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1010,14 +1020,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:int, 1:string, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 @@ -1179,14 +1191,16 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col1 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [1] + bigTableValueColumns: 1:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [1, 3, 0] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [0] + projectedOutput: 1:string, 3:string, 0:int + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1, _col2, _col3 input vertices: 1 Map 2 @@ -1348,14 +1362,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:string className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [1] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col2, _col3 input vertices: 1 Map 2 @@ -1517,14 +1533,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableRetainedColumnNums: [0, 1] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3] - smallTableMapping: [3] + nonOuterSmallTableKeyMapping: [] + projectedOutput: 0:string, 1:int, 3:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 2 diff --git ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out index ff1af2c..becf70d 100644 --- ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out +++ ql/src/test/results/clientpositive/spark/vector_mapjoin_reduce.q.out @@ -203,6 +203,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3 input vertices: 1 Map 3 @@ -217,6 +218,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 @@ -477,6 +479,7 @@ STAGE PLANS: className: VectorMapJoinInnerLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col3, _col4 input vertices: 1 Map 3 @@ -491,6 +494,7 @@ STAGE PLANS: className: VectorMapJoinLeftSemiMultiKeyOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col3 input vertices: 1 Map 4 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out index bc9d102..b770a49 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join0.q.out @@ -151,15 +151,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [1] - bigTableOuterKeyMapping: 1 -> 3 - bigTableRetainedColumnNums: [0, 1, 3] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 1:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:string, 1:int className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 3, 4] - smallTableMapping: [4] + outerSmallTableKeyMapping: 1 -> 3 + projectedOutput: 0:string, 1:int, 3:int, 4:string + smallTableValueMapping: 4:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 1 Map 2 @@ -304,15 +305,16 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableOuterKeyMapping: 0 -> 4 - bigTableRetainedColumnNums: [0, 1, 4] - bigTableValueColumnNums: [0, 1] + bigTableKeyColumns: 0:int + bigTableRetainColumnNums: [0, 1] + bigTableValueColumns: 0:int, 1:string className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3, 4, 0, 1] - smallTableMapping: [3] + outerSmallTableKeyMapping: 0 -> 4 + projectedOutput: 3:string, 4:int, 0:int, 1:string + smallTableValueMapping: 3:string + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3 input vertices: 0 Map 1 diff --git ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out index 9a1fa53..7f52b14 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join1.q.out @@ -311,15 +311,16 @@ STAGE PLANS: 0 _col2 (type: int) 1 _col2 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableOuterKeyMapping: 2 -> 15 - bigTableRetainedColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 15] - bigTableValueColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] + bigTableValueColumns: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24] - smallTableMapping: [13, 14, 16, 17, 18, 19, 20, 21, 22, 23, 24] + outerSmallTableKeyMapping: 2 -> 15 + projectedOutput: 0:tinyint, 1:smallint, 2:int, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean, 13:tinyint, 14:smallint, 15:int, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + smallTableValueMapping: 13:tinyint, 14:smallint, 16:bigint, 17:float, 18:double, 19:string, 20:string, 21:timestamp, 22:timestamp, 23:boolean, 24:boolean + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 @@ -487,13 +488,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 2 @@ -796,13 +798,14 @@ STAGE PLANS: 0 _col1 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 3 @@ -814,13 +817,14 @@ STAGE PLANS: 0 _col0 (type: tinyint) 1 _col0 (type: tinyint) Map Join Vectorization: - bigTableKeyColumnNums: [0] - bigTableRetainedColumnNums: [0] - bigTableValueColumnNums: [0] + bigTableKeyColumns: 0:tinyint + bigTableRetainColumnNums: [0] + bigTableValueColumns: 0:tinyint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [0] + projectedOutput: 0:tinyint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0 input vertices: 1 Map 4 @@ -841,10 +845,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out index 32bcc9b..8776659 100644 --- ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out +++ ql/src/test/results/clientpositive/spark/vector_outer_join2.q.out @@ -370,13 +370,14 @@ STAGE PLANS: 0 _col0 (type: int) 1 _col0 (type: int) Map Join Vectorization: - bigTableKeyColumnNums: [2] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 2:int + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 3 @@ -388,13 +389,14 @@ STAGE PLANS: 0 _col1 (type: bigint) 1 _col0 (type: bigint) Map Join Vectorization: - bigTableKeyColumnNums: [3] - bigTableRetainedColumnNums: [3] - bigTableValueColumnNums: [3] + bigTableKeyColumns: 3:bigint + bigTableRetainColumnNums: [3] + bigTableValueColumns: 3:bigint className: VectorMapJoinOuterLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true - projectedOutputColumnNums: [3] + projectedOutput: 3:bigint + hashTableImplementationType: OPTIMIZED outputColumnNames: _col1 input vertices: 1 Map 4 @@ -415,10 +417,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_0.q.out ql/src/test/results/clientpositive/spark/vectorization_0.q.out index ec73876..01e77fc 100644 --- ql/src/test/results/clientpositive/spark/vectorization_0.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_0.q.out @@ -62,10 +62,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:tinyint, 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -117,10 +116,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:tinyint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -241,10 +240,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -296,10 +294,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -562,10 +559,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:bigint, 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -617,10 +613,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:bigint, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -741,10 +737,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized @@ -796,10 +791,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:bigint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1062,10 +1056,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3] + valueColumns: 0:float, 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Execution mode: vectorized @@ -1117,10 +1110,10 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 3] + valueColumns: 1:float, 2:bigint, 3:bigint Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) Reducer 3 @@ -1241,10 +1234,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:double Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: double) Execution mode: vectorized @@ -1296,10 +1288,9 @@ STAGE PLANS: sort order: + Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE Reducer 3 Execution mode: vectorized @@ -1607,10 +1598,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:bigint, 4:double, 5:tinyint Statistics: Num rows: 1 Data size: 260 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_1.q.out ql/src/test/results/clientpositive/spark/vectorization_1.q.out index a5d4a14..8f1ca80 100644 --- ql/src/test/results/clientpositive/spark/vectorization_1.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_1.q.out @@ -94,10 +94,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:tinyint, 3:int, 4:struct, 5:bigint Statistics: Num rows: 1 Data size: 184 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: tinyint), _col3 (type: int), _col4 (type: struct), _col5 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_12.q.out ql/src/test/results/clientpositive/spark/vectorization_12.q.out index 55e3ad6..f1460f0 100644 --- ql/src/test/results/clientpositive/spark/vectorization_12.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_12.q.out @@ -121,10 +121,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3] + keyColumns: 0:double, 1:bigint, 2:string, 3:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [4, 5, 6, 7, 8] + valueColumns: 4:bigint, 5:struct, 6:struct, 7:bigint, 8:struct Statistics: Num rows: 3754 Data size: 807123 Basic stats: COMPLETE Column stats: NONE value expressions: _col4 (type: bigint), _col5 (type: struct), _col6 (type: struct), _col7 (type: bigint), _col8 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_13.q.out ql/src/test/results/clientpositive/spark/vectorization_13.q.out index de501e7..655cf25 100644 --- ql/src/test/results/clientpositive/spark/vectorization_13.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_13.q.out @@ -123,10 +123,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:tinyint, 6:double, 7:struct, 8:struct, 9:float, 10:tinyint Statistics: Num rows: 2730 Data size: 586959 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: struct), _col8 (type: struct), _col9 (type: float), _col10 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_14.q.out ql/src/test/results/clientpositive/spark/vectorization_14.q.out index b583cee..f2aed63 100644 --- ql/src/test/results/clientpositive/spark/vectorization_14.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_14.q.out @@ -124,10 +124,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4] + keyColumns: 0:string, 1:float, 2:double, 3:timestamp, 4:boolean native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5, 6, 7, 8, 9, 10] + valueColumns: 5:struct, 6:float, 7:struct, 8:bigint, 9:struct, 10:struct Statistics: Num rows: 606 Data size: 130292 Basic stats: COMPLETE Column stats: NONE value expressions: _col5 (type: struct), _col6 (type: float), _col7 (type: struct), _col8 (type: bigint), _col9 (type: struct), _col10 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_15.q.out ql/src/test/results/clientpositive/spark/vectorization_15.q.out index 70aacfc..2342ced 100644 --- ql/src/test/results/clientpositive/spark/vectorization_15.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_15.q.out @@ -119,10 +119,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2, 3, 4, 5, 6] + keyColumns: 0:float, 1:boolean, 2:double, 3:string, 4:tinyint, 5:int, 6:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [7, 8, 9, 10, 11, 12] + valueColumns: 7:struct, 8:double, 9:struct, 10:struct, 11:struct, 12:struct Statistics: Num rows: 12288 Data size: 2641964 Basic stats: COMPLETE Column stats: NONE value expressions: _col7 (type: struct), _col8 (type: double), _col9 (type: struct), _col10 (type: struct), _col11 (type: struct), _col12 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_16.q.out ql/src/test/results/clientpositive/spark/vectorization_16.q.out index 61d1345..15df356 100644 --- ql/src/test/results/clientpositive/spark/vectorization_16.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_16.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_17.q.out ql/src/test/results/clientpositive/spark/vectorization_17.q.out index c333ab1..b2c5382 100644 --- ql/src/test/results/clientpositive/spark/vectorization_17.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_17.q.out @@ -90,10 +90,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [3, 4] + keyColumns: 3:bigint, 4:float native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [6, 2, 8, 5, 15, 16, 14, 17, 19, 20, 22, 18] + valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 14:double, 17:double, 19:double, 20:double, 22:decimal(11,4), 18:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_2.q.out ql/src/test/results/clientpositive/spark/vectorization_2.q.out index f1ee936..2aee6cd 100644 --- ql/src/test/results/clientpositive/spark/vectorization_2.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_2.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:double, 2:struct, 3:bigint, 4:tinyint, 5:struct Statistics: Num rows: 1 Data size: 256 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: double), _col2 (type: struct), _col3 (type: bigint), _col4 (type: tinyint), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_3.q.out ql/src/test/results/clientpositive/spark/vectorization_3.q.out index c78de72..249d4ed 100644 --- ql/src/test/results/clientpositive/spark/vectorization_3.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_3.q.out @@ -103,10 +103,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4, 5] + valueColumns: 0:struct, 1:struct, 2:struct, 3:double, 4:struct, 5:struct Statistics: Num rows: 1 Data size: 404 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: double), _col4 (type: struct), _col5 (type: struct) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_4.q.out ql/src/test/results/clientpositive/spark/vectorization_4.q.out index c924651..55d4ff6 100644 --- ql/src/test/results/clientpositive/spark/vectorization_4.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_4.q.out @@ -98,10 +98,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:bigint, 1:struct, 2:struct, 3:struct, 4:tinyint Statistics: Num rows: 1 Data size: 252 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_5.q.out ql/src/test/results/clientpositive/spark/vectorization_5.q.out index 4cf4548..4494a1c 100644 --- ql/src/test/results/clientpositive/spark/vectorization_5.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_5.q.out @@ -92,10 +92,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1, 2, 3, 4] + valueColumns: 0:smallint, 1:bigint, 2:smallint, 3:bigint, 4:tinyint Statistics: Num rows: 1 Data size: 28 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: smallint), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: tinyint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_9.q.out ql/src/test/results/clientpositive/spark/vectorization_9.q.out index 61d1345..15df356 100644 --- ql/src/test/results/clientpositive/spark/vectorization_9.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_9.q.out @@ -96,10 +96,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double), _col1 (type: string), _col2 (type: timestamp) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0, 1, 2] + keyColumns: 0:double, 1:string, 2:timestamp native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [3, 4, 5] + valueColumns: 3:bigint, 4:struct, 5:double Statistics: Num rows: 4096 Data size: 880654 Basic stats: COMPLETE Column stats: NONE value expressions: _col3 (type: bigint), _col4 (type: struct), _col5 (type: double) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out index c46fc03..60995b3 100644 --- ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out +++ ql/src/test/results/clientpositive/spark/vectorization_nested_udf.q.out @@ -52,10 +52,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0] + valueColumns: 0:bigint Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_case.q.out ql/src/test/results/clientpositive/spark/vectorized_case.q.out index 58e295d..3d451b8 100644 --- ql/src/test/results/clientpositive/spark/vectorized_case.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_case.q.out @@ -312,10 +312,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized @@ -454,10 +453,9 @@ STAGE PLANS: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator - keyColumnNums: [] native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [0, 1] + valueColumns: 0:bigint, 1:bigint Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE Column stats: NONE value expressions: _col0 (type: bigint), _col1 (type: bigint) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out index 93ab21e..dba53bc 100644 --- ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_mapjoin.q.out @@ -100,6 +100,7 @@ STAGE PLANS: className: VectorMapJoinInnerBigOnlyLongOperator native: true nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true + hashTableImplementationType: OPTIMIZED outputColumnNames: _col0, _col1 input vertices: 1 Map 3 diff --git ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out index edc8f74..6b6cd40 100644 --- ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out +++ ql/src/test/results/clientpositive/spark/vectorized_ptf.q.out @@ -154,11 +154,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -371,10 +371,10 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 2, 5] + valueColumns: 1:string, 2:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_mfgr (type: string), p_size (type: int) Execution mode: vectorized @@ -414,10 +414,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -622,11 +621,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -778,11 +777,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -993,11 +992,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1211,11 +1210,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -1431,11 +1430,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -1475,10 +1474,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1654,10 +1652,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -1689,11 +1686,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 3, 4, 5, 6, 7, 8] + partitionColumns: 2:string + valueColumns: 0:int, 3:string, 4:string, 5:int, 6:string, 7:double, 8:string Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_brand (type: string), p_type (type: string), p_size (type: int), p_container (type: string), p_retailprice (type: double), p_comment (type: string) Execution mode: vectorized @@ -2264,11 +2261,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2478,11 +2475,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2761,11 +2758,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -2979,11 +2976,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [0, 5, 7] + partitionColumns: 2:string + valueColumns: 0:int, 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_partkey (type: int), p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -3023,10 +3020,9 @@ STAGE PLANS: Map-reduce partition columns: p_partkey (type: int) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:int native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE Execution mode: vectorized Map Vectorization: @@ -3271,11 +3267,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5] + partitionColumns: 2:string + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -3506,11 +3502,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [2] + partitionColumns: 0:string + valueColumns: 2:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: double) Execution mode: vectorized @@ -3763,11 +3759,11 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [2] - valueColumnNums: [5, 7] + partitionColumns: 2:string + valueColumns: 5:int, 7:double Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int), p_retailprice (type: double) Execution mode: vectorized @@ -4218,10 +4214,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -4531,10 +4527,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string) Reduce Sink Vectorization: className: VectorReduceSinkStringOperator - keyColumnNums: [2] + keyColumns: 2:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1, 5] + valueColumns: 1:string, 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_name (type: string), p_size (type: int) Execution mode: vectorized @@ -4839,10 +4835,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5157,10 +5153,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5484,10 +5480,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized @@ -5781,10 +5777,10 @@ STAGE PLANS: Map-reduce partition columns: p_mfgr (type: string), p_name (type: string) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [2, 1] + keyColumns: 2:string, 1:string native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine spark IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [5] + valueColumns: 5:int Statistics: Num rows: 26 Data size: 16042 Basic stats: COMPLETE Column stats: NONE value expressions: p_size (type: int) Execution mode: vectorized diff --git ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 8da42f4..2679048 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -353,9 +353,9 @@ Stage-3 Map 1 vectorized File Output Operator [FS_4] table:{"name:":"default.src_autho_test"} - Select Operator [SEL_3] (rows=500/500 width=178) + Select Operator [SEL_3] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] Stage-0 Move Operator @@ -612,15 +612,15 @@ Stage-0 Stage-1 Reducer 2 vectorized File Output Operator [FS_10] - Limit [LIM_9] (rows=5/5 width=178) + Limit [LIM_9] (rows=5/3 width=178) Number of rows:5 - Select Operator [SEL_8] (rows=500/5 width=178) + Select Operator [SEL_8] (rows=500/3 width=178) Output:["_col0","_col1"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_7] - Select Operator [SEL_6] (rows=500/500 width=178) + Select Operator [SEL_6] (rows=500/1 width=178) Output:["_col0","_col1"] - TableScan [TS_0] (rows=500/500 width=178) + TableScan [TS_0] (rows=500/1 width=178) default@src,src,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] PREHOOK: query: create table orc_merge5 (userid bigint, string1 string, subtype double, decimal1 decimal, ts timestamp) stored as orc @@ -679,7 +679,7 @@ Stage-3 Output:["_col0","_col1","_col2","_col3","_col4"] Filter Operator [FIL_6] (rows=1/3 width=352) predicate:(userid <= 13L) - TableScan [TS_0] (rows=1/15000 width=352) + TableScan [TS_0] (rows=1/15 width=352) default@orc_merge5,orc_merge5,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] PARTITION_ONLY_SHUFFLE [RS_7] Select Operator [SEL_6] (rows=1/3 width=352) @@ -847,23 +847,23 @@ Stage-0 Stage-1 Map 2 vectorized File Output Operator [FS_34] - Select Operator [SEL_33] (rows=391/480 width=186) + Select Operator [SEL_33] (rows=391/54 width=186) Output:["_col0","_col1","_col2"] - Map Join Operator [MAPJOIN_32] (rows=391/480 width=186) + Map Join Operator [MAPJOIN_32] (rows=391/54 width=186) BucketMapJoin:true,Conds:RS_29._col0=SEL_31._col0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col3"] <-Map 1 [CUSTOM_EDGE] vectorized MULTICAST [RS_29] PartitionCols:_col0 - Select Operator [SEL_28] (rows=242/242 width=95) + Select Operator [SEL_28] (rows=242/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_27] (rows=242/242 width=95) + Filter Operator [FIL_27] (rows=242/4 width=95) predicate:key is not null - TableScan [TS_0] (rows=242/242 width=95) + TableScan [TS_0] (rows=242/4 width=95) default@tab,a,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] - <-Select Operator [SEL_31] (rows=500/500 width=95) + <-Select Operator [SEL_31] (rows=500/4 width=95) Output:["_col0","_col1"] - Filter Operator [FIL_30] (rows=500/500 width=95) + Filter Operator [FIL_30] (rows=500/4 width=95) predicate:key is not null - TableScan [TS_3] (rows=500/500 width=95) + TableScan [TS_3] (rows=500/4 width=95) default@tab_part,b,Tbl:COMPLETE,Col:COMPLETE,Output:["key","value"] diff --git ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out index fd2e95d..3cdd5b3 100644 --- ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out +++ ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out @@ -338,7 +338,7 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] Map Join Operator [MAPJOIN_17] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col2 @@ -433,7 +433,7 @@ Stage-0 <-Reducer 2 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_10] Map Join Operator [MAPJOIN_18] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true <-Map 4 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 @@ -531,7 +531,7 @@ Stage-0 SHUFFLE [RS_10] PartitionCols:_col0 Map Join Operator [MAPJOIN_20] (rows=1501/10 width=215) - Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),HybridGraceHashJoin:true,Output:["_col0"] + Conds:RS_6.KEY.reducesinkkey0=RS_7.KEY.reducesinkkey0(Inner),DynamicPartitionHashJoin:true,HybridGraceHashJoin:true,Output:["_col0"] <-Map 5 [CUSTOM_SIMPLE_EDGE] PARTITION_ONLY_SHUFFLE [RS_7] PartitionCols:_col0 diff --git ql/src/test/results/clientpositive/tez/vectorization_limit.q.out ql/src/test/results/clientpositive/tez/vectorization_limit.q.out index b8a1f90..cc9510b 100644 --- ql/src/test/results/clientpositive/tez/vectorization_limit.q.out +++ ql/src/test/results/clientpositive/tez/vectorization_limit.q.out @@ -121,10 +121,10 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 5] + keyColumns: 0:tinyint, 5:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:smallint Statistics: Num rows: 9173 Data size: 109584 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col2 (type: smallint) @@ -278,11 +278,11 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [1] + partitionColumns: 0:tinyint + valueColumns: 1:struct Statistics: Num rows: 128 Data size: 10628 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 value expressions: _col1 (type: struct) @@ -439,10 +439,9 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkLongOperator - keyColumnNums: [0] + keyColumns: 0:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 128 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Execution mode: vectorized @@ -596,11 +595,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [0, 1] + keyColumns: 0:tinyint, 1:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - partitionColumnNums: [0] - valueColumnNums: [] + partitionColumns: 0:tinyint Statistics: Num rows: 6144 Data size: 55052 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: @@ -797,10 +795,10 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: double) Reduce Sink Vectorization: className: VectorReduceSinkMultiKeyOperator - keyColumnNums: [0] + keyColumns: 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [1] + valueColumns: 1:bigint Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint) Execution mode: vectorized @@ -854,10 +852,9 @@ STAGE PLANS: sort order: ++ Reduce Sink Vectorization: className: VectorReduceSinkObjectHashOperator - keyColumnNums: [1, 0] + keyColumns: 1:bigint, 0:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - valueColumnNums: [] Statistics: Num rows: 4159 Data size: 58120 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.3 Reducer 3 diff --git ql/src/test/results/clientpositive/union_offcbo.q.out ql/src/test/results/clientpositive/union_offcbo.q.out index a723f00..51b56676 100644 --- ql/src/test/results/clientpositive/union_offcbo.q.out +++ ql/src/test/results/clientpositive/union_offcbo.q.out @@ -279,7 +279,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -372,7 +372,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -959,7 +959,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1063,7 +1063,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1310,7 +1310,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) @@ -1414,7 +1414,7 @@ STAGE PLANS: Reduce Operator Tree: Join Operator condition map: - Outer Join 0 to 1 + Full Outer Join 0 to 1 keys: 0 _col8 (type: string) 1 _col8 (type: string) diff --git ql/src/test/results/clientpositive/vector_left_outer_join2.q.out ql/src/test/results/clientpositive/vector_left_outer_join2.q.out index f1b5627..55d3632 100644 --- ql/src/test/results/clientpositive/vector_left_outer_join2.q.out +++ ql/src/test/results/clientpositive/vector_left_outer_join2.q.out @@ -65,6 +65,7 @@ POSTHOOK: Output: default@tjoin1 POSTHOOK: Lineage: tjoin1.c1 SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin1.c2 EXPRESSION [(tjoin1stage)tjoin1stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin1.rnum SIMPLE [(tjoin1stage)tjoin1stage.FieldSchema(name:rnum, type:int, comment:null), ] +_col0 _col1 _col2 PREHOOK: query: INSERT INTO TABLE TJOIN2 SELECT * from TJOIN2STAGE PREHOOK: type: QUERY PREHOOK: Input: default@tjoin2stage @@ -76,12 +77,14 @@ POSTHOOK: Output: default@tjoin2 POSTHOOK: Lineage: tjoin2.c1 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c1, type:int, comment:null), ] POSTHOOK: Lineage: tjoin2.c2 SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:c2, type:char(2), comment:null), ] POSTHOOK: Lineage: tjoin2.rnum SIMPLE [(tjoin2stage)tjoin2stage.FieldSchema(name:rnum, type:int, comment:null), ] -PREHOOK: query: explain vectorization expression +tjoin2stage.rnum tjoin2stage.c1 tjoin2stage.c2 +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -166,15 +169,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: false enabledConditionsNotMet: [hive.vectorized.execution.enabled IS false] @@ -259,15 +264,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -309,6 +316,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -363,6 +371,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -382,15 +396,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -432,6 +448,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -486,6 +503,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -505,15 +528,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -555,6 +580,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -609,6 +635,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -628,15 +660,17 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL -PREHOOK: query: explain vectorization expression +PREHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) PREHOOK: type: QUERY -POSTHOOK: query: explain vectorization expression +POSTHOOK: query: explain vectorization detail select tjoin1.rnum, tjoin1.c1, tjoin1.c2, tjoin2.c2 as c2j2 from tjoin1 left outer join tjoin2 on ( tjoin1.c1 = tjoin2.c1 and tjoin1.c2 > 15 ) POSTHOOK: type: QUERY +Explain PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] @@ -678,6 +712,7 @@ STAGE PLANS: Statistics: Num rows: 3 Data size: 32 Basic stats: COMPLETE Column stats: NONE TableScan Vectorization: native: true + vectorizationSchemaColumns: [0:rnum:int, 1:c1:int, 2:c2:int, 3:ROW__ID:struct] Select Operator expressions: rnum (type: int), c1 (type: int), c2 (type: int) outputColumnNames: _col0, _col1, _col2 @@ -732,6 +767,12 @@ STAGE PLANS: allNative: false usesVectorUDFAdaptor: false vectorized: true + rowBatchContext: + dataColumnCount: 3 + includeColumns: [0, 1, 2] + dataColumns: rnum:int, c1:int, c2:int + partitionColumnCount: 0 + scratchColumnTypeNames: [string] Local Work: Map Reduce Local Work @@ -751,6 +792,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@tjoin1 POSTHOOK: Input: default@tjoin2 #### A masked pattern was here #### +tjoin1.rnum tjoin1.c1 tjoin1.c2 c2j2 0 10 15 NULL 1 20 25 NULL 2 NULL 50 NULL diff --git serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java index 22aadbb..9cfd8f2 100644 --- serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java +++ serde/src/test/org/apache/hadoop/hive/serde2/VerifyFast.java @@ -599,8 +599,6 @@ public static Object deserializeReadComplexType(DeserializeRead deserializeRead, return getComplexField(deserializeRead, typeInfo); } - static int fake = 0; - private static Object getComplexField(DeserializeRead deserializeRead, TypeInfo typeInfo) throws IOException { switch (typeInfo.getCategory()) { diff --git storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java index 5e25c47..953604c 100644 --- storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java +++ storage-api/src/java/org/apache/hadoop/hive/ql/exec/vector/BytesColumnVector.java @@ -181,7 +181,9 @@ public void setVal(int elementNum, byte[] sourceBuf, int start, int length) { if ((nextFree + length) > buffer.length) { increaseBufferSpace(length); } - System.arraycopy(sourceBuf, start, buffer, nextFree, length); + if (length > 0) { + System.arraycopy(sourceBuf, start, buffer, nextFree, length); + } vector[elementNum] = buffer; this.start[elementNum] = nextFree; this.length[elementNum] = length; diff --git storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java index dcbba7a..3095114 100644 --- storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java +++ storage-api/src/test/org/apache/hadoop/hive/ql/exec/vector/TestTimestampColumnVector.java @@ -38,8 +38,6 @@ private static int TEST_COUNT = 5000; - private static int fake = 0; - @Test public void testSaveAndRetrieve() throws Exception {